def test_get_output(self): jvm = Mock() baos = ByteArrayOutputStream() baos.jvm = jvm baos.get_output() baos.jvm_obj.toString().strip.assert_called()
def test_output(self): check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2") baos = ByteArrayOutputStream() reporter = ConsoleReporter(baos) check.run([reporter]) expected_output = """ \x1b[34mChecking [_1: bigint, _2: string]\x1b[0m \x1b[34mIt has a total number of 2 columns and 3 rows.\x1b[0m \x1b[31m- Column _1 is not a key (1 non-unique tuple).\x1b[0m \x1b[32m- Columns _1, _2 are a key.\x1b[0m """.strip() self.assertEqual(baos.get_output(), expected_output)
def test_jvm_obj(self): jvm = Mock() baos = ByteArrayOutputStream() with self.assertRaises(AttributeError): jvm_obj = baos.jvm_obj # check that on the second call ByteArrayOutputStream returns the same jvm_obj jvm.java.io.ByteArrayOutputStream = Mock(side_effects=[1, 2]) baos.jvm = jvm jvm_obj1 = baos.jvm_obj jvm_obj2 = baos.jvm_obj self.assertEqual(jvm_obj1, jvm_obj2)
def test_output(self): check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2") baos = ByteArrayOutputStream() reporter = MarkdownReporter(baos) check.run([reporter]) expected_output = """ **Checking [_1: bigint, _2: string]** It has a total number of 2 columns and 3 rows. - *FAILURE*: Column _1 is not a key (1 non-unique tuple). - *SUCCESS*: Columns _1, _2 are a key. """.strip() self.assertEqual(baos.get_output(), expected_output)
def test_jvm_obj(self): jvm = Mock() baos = ByteArrayOutputStream() with self.assertRaises(AttributeError): jvm_obj = baos.jvm_obj # check that on the second call ByteArrayOutputStream returns the same jvm_obj jvm.java.io.ByteArrayOutputStream = Mock( side_effects=[1, 2] ) baos.jvm = jvm jvm_obj1 = baos.jvm_obj jvm_obj2 = baos.jvm_obj self.assertEqual(jvm_obj1, jvm_obj2)
def test_output(self): with patch("pyddq.reporters.get_field") as get_field: baos = ByteArrayOutputStream() baos.jvm = self.df._sc._jvm get_field.return_value = baos.jvm_obj check = Check(self.df).hasUniqueKey("_1").hasUniqueKey("_1", "_2") z = Mock() reporter = ZeppelinReporter(z) check.run([reporter]) expected_output = """ %html </p> <h4>Checking [_1: bigint, _2: string]</h4> <h5>It has a total number of 2 columns and 3 rows.</h5> <table> <tr><td style="padding:3px">❌</td><td style="padding:3px">Column _1 is not a key (1 non-unique tuple).</td></tr> <tr><td style="padding:3px">✅</td><td style="padding:3px">Columns _1, _2 are a key.</td></tr> </table> <p hidden> """.strip() self.assertEqual(baos.get_output(), expected_output)
def run(self, reporters=None): """ Runs check with all the previously specified constraints and report to every reporter passed as an argument Args: reporters (List[reporters.Reporter]): iterable of reporters to produce output on the check result. If not specified, reporters.ConsoleReporter is used Returns: None """ baos = None if not reporters: baos = ByteArrayOutputStream() reporters = [ConsoleReporter(baos)] jvm_reporters = jc.iterable_to_scala_list( self._jvm, [reporter.get_jvm_reporter(self._jvm) for reporter in reporters]) self.jvmCheck.run(jvm_reporters) if baos: print(baos.get_output())
def run(self, reporters=None): """ Runs check with all the previously specified constraints and report to every reporter passed as an argument Args: reporters (List[reporters.Reporter]): iterable of reporters to produce output on the check result. If not specified, reporters.ConsoleReporter is used Returns: None """ baos = None if not reporters: baos = ByteArrayOutputStream() reporters = [ConsoleReporter(baos)] jvm_reporters = jc.iterable_to_scala_list( self._jvm, [reporter.get_jvm_reporter(self._jvm) for reporter in reporters] ) self.jvmCheck.run(jvm_reporters) if baos: print baos.get_output()
def setUp(self): self.spark = SparkSession.builder.appName("Testing").master( "local[4]").getOrCreate() self.reporter = MarkdownReporter(ByteArrayOutputStream())
def setUp(self): self.sc = SparkContext() self.sqlContext = SQLContext(self.sc) self.reporter = MarkdownReporter(ByteArrayOutputStream())