import sys

import pandas as pd
import numpy as np

from pyspark.sql import SparkSession

from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf
from lendingclub_scoring.pipelines.LendingClubTrainingPipeline import LendingClubTrainingPipeline

spark = SparkSession.builder.appName('ForecastingTest').getOrCreate()
conf = read_config('train_config.yaml', sys.argv[1])
setupMlflowConf(conf)
p = LendingClubTrainingPipeline(spark, conf['data-path'], conf['model-name'])
p.run()
Exemplo n.º 2
0
import sys

import pandas as pd
import numpy as np
from mlflow.tracking import MlflowClient

from pyspark.sql import SparkSession

from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf
from lendingclub_scoring.pipelines.LendingClubTrainingPipeline import LendingClubTrainingPipeline
from lendingclub_scoring.pipelines.LendingClubModelEvaluationPipeline import LendingClubModelEvaluationPipeline
from lendingclub_scoring.pipelines.LendingClubConsumerPipeline import LendingClubConsumerPipeline

spark = SparkSession.builder.appName('ForecastingTest').getOrCreate()
conf = read_config('e2e_int_config.yaml', sys.argv[1])
experimentID = setupMlflowConf(conf)

limit = 100000

# train
p = LendingClubTrainingPipeline(spark,
                                conf['data-path'],
                                conf['model-name'],
                                limit=limit)
p.run()

spark_df = spark.read.format("mlflow-experiment").load(experimentID)
assert spark_df.where("tags.candidate='true'").count() > 0

# deploy
p = LendingClubModelEvaluationPipeline(spark,