def run_training() -> None:
    """Train the model."""
    _logger.info(f"training the pipeline with version: {_version}")
    # read training data
    data = load_dataset(file_name=config.DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES], data[config.TARGET], test_size=0.1, random_state=0
    )  # we are setting the seed here

    pipeline.marathon_pipeline.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.marathon_pipeline)

    print("########################################")
    print("Test prediction: ")

    test_data = load_dataset(file_name='test.csv')
    single_test_json = test_data[0:1].to_json(orient='records')

    # When
    subject = make_prediction(input_data=single_test_json)
    print(subject)
Exemplo n.º 2
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # print(data.head())

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    pipeline.end_to_end_pipeline.fit(X_train[config.FEATURES], y_train)

    pred = pipeline.end_to_end_pipeline.predict(X_test)

    # determine mse and rmse
    print("test mse: {}".format(int(mean_squared_error(y_test, (pred)))))
    print("test rmse: {}".format(
        int(np.sqrt(mean_squared_error(y_test, (pred))))))
    print("test r2: {}".format(r2_score(y_test, (pred))))
    print(pipeline.end_to_end_pipeline.named_steps["Linear_model"].coef_)

    _version = "0.0.1"
    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.end_to_end_pipeline)
Exemplo n.º 3
0
def run_training() :

    data = load_dataset(file_name= TRAINING_DATA_FILE)

    X_train, X_test , Y_train, Y_test = train_test_split(data[FEATURES],data[TARGET],
                                                        test_size = 0.2, random_state=0 )
    Y_train = np.log(Y_train)
    Y_test  = np.log(Y_test)
    price_pipe.fit(X_train[FEATURES], Y_train)
    _logger.info(f"saving model version:{_version}")
    save_pipeline(pipeline_to_save=price_pipe)
def run_training() -> None:
    """Train the model"""

    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X, y = data[config.FEATURES], data[config.TARGET]

    pipeline.fit(X, y)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline)
def run_training() -> None:
    """Train the model"""

    #read training data
    data = load_dataset(filename=config.TRAINING_DATA_FILE)

    #train test split
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=config.TEST_SIZE,
        random_state=config.RANDOM_STATE)

    pipeline.energy_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_save=pipeline.energy_pipe)
Exemplo n.º 6
0
def run_training() -> None:

    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.1,
                                                        random_state=0)

    #transforming the target
    y_train = np.log(y_train)
    y_test = np.log(y_test)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)
    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
Exemplo n.º 7
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f'saving model version: {_version}')
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.DATASET_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data.drop(config.TARGET, axis=1),
        data[config.TARGET],
        test_size=0.2,
        random_state=0)  # we are setting the seed here

    pipeline.titanic_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.titanic_pipe)
Exemplo n.º 9
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # split data into train and test data
    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES],
                                                        data[config.TARGET],
                                                        test_size=0.1,
                                                        random_state=0)

    # transform the target
    y_train = np.log(y_train)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
Exemplo n.º 10
0
def run_training() -> None:
    """ Train the model """

    """Read training data"""

    data = load_dataset(file_name = config.TRAINING_DATA_FILE)

    """Divide the dataset into training and testing"""

    X_train, X_test, y_train, y_test = train_test_split(data[config.FEATURES], data[config.TARGET], test_size = 0.1, random_state = 0)

    """Transform the target """
    y_train = np.log(y_train)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist = pipeline.price_pipe)
Exemplo n.º 11
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)
    # _data = pd.read_csv(f"{config.DATASET_DIR}/{config.TRAINING_DATA_FILE}")
    # print(_data.head())

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES], data[config.TARGET],
        random_state=0)  # we are setting the seed here

    y_train = y_train.apply(lambda x: 1 if x == "M" else 0)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
Exemplo n.º 12
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_path_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data.iloc[:, :-1], data[config.TARGET], test_size=0.1,
        random_state=0)  # we are setting the seed here

    # transform the target
    y_train = np.log(y_train)

    #print('Data divided into training and test')
    pipeline.price_pipe.fit(X_train, y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
Exemplo n.º 13
0
def run_training() -> None:
    """Entenar el modelo."""

    # Lee los datos de entrenamiento
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # Divide entre set de entrenamiento y prueba
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # indicamos la semilla!!!

    # Transformamos la variable objetivo
    y_train = np.log(y_train)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f"saving model version: {_version}")
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
Exemplo n.º 14
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    # transform the target
    y_train = np.log(y_train)
    y_test = np.log(y_test)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    save_pipeline(pipeline=pipeline.price_pipe)
def run_training() -> None:
    """モデルを学習する。"""

    # 学習データの読み込み
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # 学習データとテストデータを分割
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # ここにシードを設定しています

    # ターゲットを変換する
    y_train = np.log(y_train)
    y_test = np.log(y_test)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f'saving model version: {_version}')
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
Exemplo n.º 16
0
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    # transform the target, use log format because it is expected by the sklearn pipeline
    y_train = np.log(y_train)
    y_test = np.log(y_test)

    pipeline.price_pipe.fit(X_train[config.FEATURES],
                            y_train)
    #after applying fit we save to pipeline
    _logger.info(f'saving model version: {_version}')
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None:
    """Train the model."""

    # read training data
    print(f'Current directory: {os.getcwd()}')
    print(f"Train path: {config.DATASET_DIR}")

    
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    # transform the target
    y_train = np.log(y_train)

    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)
    #joblib.dump(pipeline.price_pipe, config.PIPELINE_NAME)
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)
def run_training() -> None:
    """Train the model."""

    # read training data
    data = load_dataset(file_name=config.TRAINING_DATA_FILE)

    # divide train and test
    X_train, X_test, y_train, y_test = train_test_split(
        data[config.FEATURES],
        data[config.TARGET],
        test_size=0.1,
        random_state=0)  # we are setting the seed here

    # transform the target
    y_train = np.log(y_train)
    y_test = np.log(y_test)

    # call fit for all class of pipeline.py
    # each class contains __init__, fit with return = self and preprocessing with return X
    # this is the right way to make a class for pipeline
    pipeline.price_pipe.fit(X_train[config.FEATURES], y_train)

    _logger.info(f'saving model version: {_version}')
    save_pipeline(pipeline_to_persist=pipeline.price_pipe)