def test_parquet_to_csv_overwrite(self): copytree(self.kwargs.get('parquet'), self.parquet_copy) ct = Converter(input=self.parquet_copy, output=self.csv_copy, mode='overwrite') ct.write() df_in = ct.df df_out = ct.sqlCtx.read.csv(self.csv_copy, header=True) self.assertTrue(self.assertRDDEquals(df_in.rdd, df_out.rdd)) ct.tearDown()
def test_csv_to_parquet_overwrite(self): copyfile(self.kwargs.get('csv'), self.csv_copy) ct = Converter(input=self.csv_copy, output=self.parquet_copy, mode='overwrite') ct.write() df_in = ct.df df_out = ct.sqlCtx.read.format(ct.out_format).load(self.parquet_copy) self.assertTrue(self.assertRDDEquals(df_in.rdd, df_out.rdd)) ct.tearDown()