import sys import pandas as pd import numpy as np from mlflow.tracking import MlflowClient from pyspark.sql import SparkSession from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf from lendingclub_scoring.pipelines.LendingClubTrainingPipeline import LendingClubTrainingPipeline from lendingclub_scoring.pipelines.LendingClubModelEvaluationPipeline import LendingClubModelEvaluationPipeline from lendingclub_scoring.pipelines.LendingClubConsumerPipeline import LendingClubConsumerPipeline spark = SparkSession.builder.appName('ForecastingTest').getOrCreate() conf = read_config('e2e_int_config.yaml', sys.argv[1]) experimentID = setupMlflowConf(conf) limit = 100000 # train p = LendingClubTrainingPipeline(spark, conf['data-path'], conf['model-name'], limit=limit) p.run() spark_df = spark.read.format("mlflow-experiment").load(experimentID) assert spark_df.where("tags.candidate='true'").count() > 0 # deploy p = LendingClubModelEvaluationPipeline(spark,
import sys import pandas as pd import numpy as np from pyspark.sql import SparkSession from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf from lendingclub_scoring.pipelines.LendingClubTrainingPipeline import LendingClubTrainingPipeline spark = SparkSession.builder.appName('ForecastingTest').getOrCreate() conf = read_config('train_config.yaml', sys.argv[1]) setupMlflowConf(conf) p = LendingClubTrainingPipeline(spark, conf['data-path'], conf['model-name']) p.run()
import sys from pyspark.sql import SparkSession import os from lendingclub_scoring.pipelines.LendingClubConsumerPipeline import LendingClubConsumerPipeline from lendingclub_scoring.config.ConfigProvider import read_config, setupMlflowConf spark = SparkSession.builder.appName('Test').getOrCreate() conf = read_config('consumer_config.yaml', sys.argv[1]) setupMlflowConf(conf) p = LendingClubConsumerPipeline(spark, conf['data-path'],conf['output-path'],conf['model-name'], conf['stage']) p.run() spark.read.load(conf['output-path']).show(1000, False)