Пример #1
0
 def setUpClass(cls):
     cls.spark = setup_pyspark().appName(
         "test-profiles-local").getOrCreate()
     cls.sc = cls.spark.sparkContext
     cls.df = cls.sc.parallelize([
         Row(a="foo", b=1, c=5),
         Row(a="bar", b=2, c=6),
         Row(a="baz", b=3, c=None)
     ]).toDF()
    def setUpClass(cls):
        cls.spark = setup_pyspark().appName(
            "test-anomalydetection-local").getOrCreate()
        cls.sc = cls.spark.sparkContext

        cls.df_1 = cls.sc.parallelize([
            Row(
                a=3,
                b=0,
                c="colder",
            ),
            Row(
                a=3,
                b=5,
                c="bolder",
            ),
        ]).toDF()

        cls.df_2 = cls.sc.parallelize([
            Row(
                a=3,
                b=0,
                c="foo",
            ),
            Row(
                a=3,
                b=5,
                c="zoo",
            ),
            Row(
                a=100,
                b=5,
                c="who",
            ),
            Row(
                a=2,
                b=30,
                c="email",
            ),
            Row(
                a=10,
                b=5,
                c="cards",
            ),
        ]).toDF()

        cls.df_3 = cls.sc.parallelize([Row(
            a=1,
            b=23,
            c="pool",
        )]).toDF()

        cls.df_4 = cls.sc.parallelize([Row(
            a=1,
            b=23,
            c="pool",
        )]).toDF()
Пример #3
0
 def setUpClass(cls):
     cls.spark = setup_pyspark().appName("test-analyzers-local").getOrCreate()
     # cls.AnalysisRunner = AnalysisRunner(cls.spark)
     cls.pydeequ_session = PyDeequSession(cls.spark)
     cls.AnalysisRunner = cls.pydeequ_session.createAnalysisRunner()
     cls.sc = cls.spark.sparkContext
     cls.df = cls.sc.parallelize(
         [Row(a="foo", b=1, c=5, d=1), Row(a="bar", b=2, c=6, d=3), Row(a="baz", b=3, c=None, d=1)]
     ).toDF()
Пример #4
0
 def setUpClass(cls):
     cls.spark = setup_pyspark().appName(
         "test-analyzers-local").getOrCreate()
     cls.ConstraintSuggestionRunner = ConstraintSuggestionRunner(cls.spark)
     cls.sc = cls.spark.sparkContext
     cls.df = cls.sc.parallelize([
         Row(a="foo", b=1, c=5),
         Row(a="bar", b=2, c=6),
         Row(a="baz", b=3, c=None)
     ]).toDF()
Пример #5
0
 def setUpClass(cls):
     cls.spark = setup_pyspark().appName(
         "test-analyzers-local").getOrCreate()
     cls.AnalysisRunner = AnalysisRunner(cls.spark)
     cls.VerificationSuite = VerificationSuite(cls.spark)
     cls.sc = cls.spark.sparkContext
     cls.df = cls.sc.parallelize([
         Row(a="foo", b=1, c=5),
         Row(a="bar", b=2, c=6),
         Row(a="baz", b=3, c=None)
     ]).toDF()
Пример #6
0
 def setUpClass(cls):
     cls.spark = setup_pyspark().appName("test-analyzers-local").getOrCreate()
     cls.pydeequ_session = PyDeequSession(cls.spark)
     cls.AnalysisRunner = cls.pydeequ_session.createAnalysisRunner()
     cls.ColumnProfilerRunner = ColumnProfilerRunner(cls.spark)
     cls.ConstraintSuggestionRunner = ConstraintSuggestionRunner(cls.spark)
     cls.sc = cls.spark.sparkContext
     data = [("foo", 1, True, 1.0, float("nan")), ("bar", 2, False, 2.0, float("nan"))]
     cls.pyspark_df = cls.spark.createDataFrame(data, schema=["strings", "ints", "bools", "floats", "nans"])
     cls.pandas_df = pandasDF(
         {
             "strings": ["foo", "bar"],
             "ints": [1, 2],
             "bools": [True, False],
             "floats": [1.0, 2.0],
             "nans": [np.nan, np.nan],
         }
     )
Пример #7
0
 def setUpClass(cls):
     cls.spark = setup_pyspark().appName(
         "test-scala-utils-local").getOrCreate()
     cls.sc = cls.spark.sparkContext