示例#1
0
class BiasFairnessMetadata(CopyToTable):

    #### Bucket where all ingestions will be stored in AWS S3
    bucket = luigi.Parameter()

    #### Defining the ingestion type to Luigi (`consecutive` or `initial`)
    ingest_type = luigi.Parameter()

    def requires(self):
        return BiasFairnessUnitTest(ingest_type=self.ingest_type,
                                    bucket=self.bucket)

    credentials = get_postgres_credentials("conf/local/credentials.yaml")

    user = credentials['user']
    password = credentials['pass']
    database = credentials['db']
    host = credentials['host']
    port = credentials['port']
    table = 'dpa_metadata.bias_fairness'

    ## Metadata columns saved in RDS file
    columns = [("execution_time", "VARCHAR"), ("value_k", "VARCHAR"),
               ("v_group", "VARCHAR"), ("FOR_p", "VARCHAR"),
               ("FNR_p", "VARCHAR")]

    def rows(self):
        reader = pd.read_csv(csv_local_file, header=None)

        for element in reader.itertuples(index=False):
            yield element
示例#2
0
class ExtractUnitTest(CopyToTable):

    ingest_type = luigi.Parameter()


    def requires(self):

        return APIDataIngestion(self.ingest_type)

    credentials = get_postgres_credentials("conf/local/credentials.yaml")

    user = credentials['user']
    password = credentials['pass']
    database = credentials['db']
    host = credentials['host']
    port = credentials['port']
    table = 'dpa_unittest.extract'

    columns = [("Date", "VARCHAR"),
               ("Result", "VARCHAR")]

    def rows(self):
        reader = pd.read_csv(csv_local_file, header=None)

        for element in reader.itertuples(index=False):
            yield element
        if "FAILED" in reader[1][1]:
            raise TypeError("FAILED, you have an empty dataframe")
示例#3
0
class Monitor(CopyToTable):

    #### Bucket where all ingestions will be stored in AWS S3
    bucket = luigi.Parameter()

    #### Defining the ingestion type to Luigi (`consecutive` or `initial`)
    ingest_type = luigi.Parameter()

    def requires(self):
        return StorePredictionsApi(ingest_type=self.ingest_type,
                                   bucket=self.bucket)

    credentials = get_postgres_credentials("conf/local/credentials.yaml")

    user = credentials['user']
    password = credentials['pass']
    database = credentials['db']
    host = credentials['host']
    port = credentials['port']
    table = 'dpa_monitor.monitor'

    ## Metadata columns saved in RDS file
    columns = [
        ("id_client", "VARCHAR"),
        ("prediction_date", "VARCHAR"),
        ("model_label", "VARCHAR"),
        ("score_label_0", "VARCHAR"),
        ("score_label_1", "VARCHAR"),
    ]

    def rows(self):
        reader = pd.read_csv(csv_local_file, header=0)

        for element in reader.itertuples(index=False):
            yield element
class ModelTrainingMetadata(CopyToTable):

    #### Bucket where all ingestions will be stored in AWS S3
    bucket = luigi.Parameter()

    #### Defining the ingestion type to Luigi (`consecutive` or `initial`)
    ingest_type = luigi.Parameter()

    csv_local_file = "src/pipeline/luigi/luigi_tmp_files/models_training_metadata.csv"

    def requires(self):
        return ModelTrainingTest(ingest_type=self.ingest_type,
                                 bucket=self.bucket)

    credentials = get_postgres_credentials("conf/local/credentials.yaml")

    user = credentials['user']
    password = credentials['pass']
    database = credentials['db']
    host = credentials['host']
    port = credentials['port']
    table = 'dpa_metadata.model_training'

    ## ADAPTAR al numero de columnas correctas
    columns = [("execution_time", "VARCHAR"), ("no_models_trained", "VARCHAR"),
               ("type_models_trained", "VARCHAR")]

    def rows(self):
        reader = pd.read_csv(csv_local_file, header=None)

        for element in reader.itertuples(index=False):
            yield element
示例#5
0
class SaveS3Metadata(CopyToTable):

    #### Bucket where all ingestions will be stored in AWS S3
    bucket = luigi.Parameter()

    #### Defining the ingestion type to Luigi (`consecutive` or `initial`)
    ingest_type = luigi.Parameter()

    csv_local_file = "src/pipeline/luigi/luigi_tmp_files/saveS3_metadata.csv"

    def requires(self):
        return SaveS3UnitTest(ingest_type=self.ingest_type, bucket=self.bucket)

    credentials = get_postgres_credentials("conf/local/credentials.yaml")

    user = credentials['user']
    password = credentials['pass']
    database = credentials['db']
    host = credentials['host']
    port = credentials['port']
    table = 'dpa_metadata.saveS3'

    ## Postgres table layout
    columns = [("save_time", "VARCHAR"), ("s3_bucket_name", "VARCHAR"),
               ("s3_key_name", "VARCHAR"), ("df_shape", "VARCHAR")]

    def rows(self):
        reader = pd.read_csv(csv_local_file, header=None)

        for element in reader.itertuples(index=False):
            yield element
class ModelTrainingTest(CopyToTable):

    #### Bucket where all ingestions will be stored in AWS S3
    bucket = luigi.Parameter()

    #### Defining the ingestion type to Luigi (`consecutive` or `initial`)
    ingest_type = luigi.Parameter()


    def requires(self):
        return ModelTraining(ingest_type=self.ingest_type, bucket=self.bucket)

    credentials = get_postgres_credentials("conf/local/credentials.yaml")

    user = credentials['user']
    password = credentials['pass']
    database = credentials['db']
    host = credentials['host']
    port = credentials['port']
    table = 'dpa_unittest.model_training'



    columns = [("Date", "VARCHAR"),
               ("Result", "VARCHAR")]



    def rows(self):
        reader = pd.read_csv(csv_local_file, header=None)

        for element in reader.itertuples(index=False):
            yield element
        if "FAILED" in reader[1][1]:
            raise TypeError("FAILED, your X_train have less than 50 rows")