def test_overlay(self): from pyspark.sql.functions import col, lit, overlay from itertools import chain import re actual = list( chain.from_iterable([ re.findall("(overlay\\(.*\\))", str(x)) for x in [ overlay(col("foo"), col("bar"), 1), overlay("x", "y", 3), overlay(col("x"), col("y"), 1, 3), overlay("x", "y", 2, 5), overlay("x", "y", lit(11)), overlay("x", "y", lit(2), lit(5)), ] ])) expected = [ "overlay(foo, bar, 1, -1)", "overlay(x, y, 3, -1)", "overlay(x, y, 1, 3)", "overlay(x, y, 2, 5)", "overlay(x, y, 11, -1)", "overlay(x, y, 2, 5)", ] self.assertListEqual(actual, expected)
def test_overlay(self): from pyspark.sql.functions import col, lit, overlay from itertools import chain import re actual = list( chain.from_iterable( [ re.findall("(overlay\\(.*\\))", str(x)) for x in [ overlay(col("foo"), col("bar"), 1), overlay("x", "y", 3), overlay(col("x"), col("y"), 1, 3), overlay("x", "y", 2, 5), overlay("x", "y", lit(11)), overlay("x", "y", lit(2), lit(5)), ] ] ) ) expected = [ "overlay(foo, bar, 1, -1)", "overlay(x, y, 3, -1)", "overlay(x, y, 1, 3)", "overlay(x, y, 2, 5)", "overlay(x, y, 11, -1)", "overlay(x, y, 2, 5)", ] self.assertListEqual(actual, expected) df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len")) exp = [Row(ol="SPARK_CORESQL")] self.assertTrue( all( [ df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp, df.select(overlay(df.x, df.y, lit(7), lit(0)).alias("ol")).collect() == exp, df.select(overlay("x", "y", "pos", "len").alias("ol")).collect() == exp, ] ) )