def test_run(self): config = TransformationsParserConfig(CONFIG_PATH) parser = TransformationsParser( config.content["processing"]["transformation"]) parser.run() #self.assertEqual(len(parser.expanded_transformation), 5, 'Transformations should contain 5 elements') self.assertEqual( parser.expanded_transformation[1], 'dst_ip', "2 element in expanded transformation should be 'dst_ip'") for index in [0, 2, 3]: self.assertIsInstance( parser.expanded_transformation[index], FieldTransformation, "{} element expanded transformation should has FieldTransformation type" .format(index)) self.assertEqual( parser.expanded_transformation[index].name, stub['run_test'][index]['field_name'], "expanded_transformation[{}].field_name should be {}".format( index, stub["run_test"][index]["field_name"])) self.assertIsInstance( parser.expanded_transformation[index].body, stub["run_test"][index]["type"], 'expanded_transformation[{}].operation should be instance of {}' .format(index, stub["run_test"][index]["type"]))
def test__parse_field(self): config = TransformationsParserConfig(CONFIG_PATH) parser = TransformationsParser(config.content["processing"]["transformation"]) result = parser._parse("sample_rating") self.assertIsInstance(result, str, "Result should be instance of string") self.assertEqual(result, "sample_rating", "Value this leaf node should be 'sample_rating'")
def test__parse_raise_incorrect_expression_error(self): config = TransformationsParserConfig(CONFIG_PATH) parser = TransformationsParser( config.content["processing"]["transformation"]) with self.assertRaises(errors.IncorrectExpression): parser._parse("add((1,2)", True)
def test_build_lambda_processor_config(self): parser = TransformationsParser(["a: config('input.options.port')"]) parser.run() operations = TransformationOperations(self.config) transformations_validator = TransformationsValidator( operations, self.data_structure) _ = transformations_validator.validate(parser.expanded_transformation) creator = TransformationCreator(self.data_structure, parser.expanded_transformation, TransformationOperations(self.config)) transformation = creator.build_lambda() self.assertIsInstance(transformation, types.LambdaType, "Transformation type should be lambda") spark = SparkSession.builder.getOrCreate() file = spark.read.csv(DATA_PATH, self.data_structure_pyspark) result = file.rdd.map(transformation) result = result.collect() self.assertListEqual(result, [(29092, ), (29092, ), (29092, ), (29092, ), (29092, )], "List of tuples should be equal") spark.stop()
def test__parse_nested_operations(self): config = TransformationsParserConfig(CONFIG_PATH) parser = TransformationsParser(config.content["processing"]["transformation"]) expression = "minus(mult({}),mult({},sum({})))".format(",".join(stub["first_mult"]), stub["second_mult"][0], ",".join(stub["second_mult"][1])) result = parser._parse(expression) self.assertIsInstance(result, SyntaxTree, "Result should be instance of SyntaxTree") self.assertEqual(result.operation, "minus", "Operation should be 'minus'") self.assertEqual(len(result.children), 2, "Should have 2 children") # Check first child # mult(1,3) first_mult = result.children[0] # mult(1,3) self.assertIsInstance(first_mult, SyntaxTree, "Result should be instance of SyntaxTree") self.assertEqual(first_mult.operation, "mult", "Operation should be 'mult'") self.assertEqual(len(first_mult.children), 2, "Should have 2 children") for index in range(0, 2): self.assertIsInstance(first_mult.children[index], str, "children[{}] should be instance of str".format(index)) self.assertEqual(first_mult.children[index], stub["first_mult"][index], "Mult {} argument should be {}".format(index, stub["first_mult"][index])) # Check second child mult(1,sum(2,3)) second_mult = result.children[1] self.assertIsInstance(second_mult, SyntaxTree, "Result should be instance of SyntaxTree") self.assertEqual(second_mult.operation, "mult", "Operation should be 'mult'") self.assertEqual(len(second_mult.children), 2, "Should have 2 children") # second_mult[0] should be 1 self.assertIsInstance(second_mult.children[0], str, "children[{}] should be instance of str".format(0)) self.assertEqual(second_mult.children[0], stub["second_mult"][0], "Mult {} argument should be {}".format(0, stub["second_mult"][0])) # second_mult[1] should be SyntaxTree sub_sum = second_mult.children[1] self.assertIsInstance(sub_sum, SyntaxTree, "children[{}] should be instance of SyntaxTree".format(1)) self.assertEqual(sub_sum.operation, "sum", "Operation should be 'sum'") self.assertEqual(len(sub_sum.children), 2, "Should have 2 children") for index in range(0, 2): self.assertIsInstance(sub_sum.children[index], str, "children[{}] should be instance of str".format(index)) self.assertEqual(sub_sum.children[index], stub["second_mult"][1][index], "Sum {} argument should be {}".format(index, stub["second_mult"][1][index]))
def test__parse_simple_operation(self): config = TransformationsParserConfig(CONFIG_PATH) parser = TransformationsParser(config.content["processing"]["transformation"]) expression = "sum({})".format(",".join(stub["sum"])) result = parser._parse(expression) self.assertIsInstance(result, SyntaxTree, "Result should be instance of SyntaxTree") self.assertEqual(result.operation, "sum", "Operation should be 'sum'") self.assertEqual(len(result.children), 3, "Should have 3 children") for index in range(0, 3): self.assertIsInstance(result.children[index], str, "children[{}] should be instance of Leaf".format(index)) self.assertEqual(result.children[index], stub["sum"][index], "Sum {} argument should be {}".format(index, stub["sum"][index]))
def test__parse_config_operation(self): config = TransformationsParserConfig(CONFIG_PATH) parser = TransformationsParser( config.content["processing"]["transformation"]) expression = "config({})".format(stub["config"]) result = parser._parse(expression, True) self.assertIsInstance(result, SyntaxTree, "Result should be instance of SyntaxTree") self.assertEqual(result.operation, "config", "Operation should be 'config'") self.assertEqual(len(result.children), 1, "Should have 1 children") self.assertIsInstance( result.children[0], str, "children[{}] should be instance of Leaf".format(0))
def test_build_lambda_processor_add(self): self.maxDiff = None parser = TransformationsParser([ "dst_ip: add(-13.5, 2)", "src_ip:add(-13.5,2)", "foobar: 'add(-13.5,2)'", "foobar2: 'add\\'(-13.5,2)'" ]) parser.run() operations = TransformationOperations(self.config) transformations_validator = TransformationsValidator( operations, self.data_structure) _ = transformations_validator.validate(parser.expanded_transformation) creator = TransformationCreator(self.data_structure, parser.expanded_transformation, TransformationOperations(self.config)) transformation = creator.build_lambda() self.assertIsInstance(transformation, types.LambdaType, "Transformation type should be lambda") spark = SparkSession.builder.getOrCreate() file = spark.read.csv(DATA_PATH, self.data_structure_pyspark) result = file.rdd.map(transformation) result = result.collect() self.assertListEqual(result, [(-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)"), (-11.5, -11.5, 'add(-13.5,2)', "add'(-13.5,2)")], "List of tuples should be equal") spark.stop()
def test__parse_transformation_types(self): parser = TransformationsParser([]) p = parser._parse("1", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of int") self.assertIsInstance(p.children[0], int, "Result should be an instance of int") p = parser._parse("1.0", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of float") self.assertIsInstance(p.children[0], float, "Result should be an instance of int") p = parser._parse("False", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of bool") self.assertIsInstance(p.children[0], bool, "Result should be an instance of int") p = parser._parse("True", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of bool") self.assertIsInstance(p.children[0], bool, "Result should be an instance of int") p = parser._parse("'Fo,o'", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of str") self.assertIsInstance(p.children[0], str, "Result should be an instance of int") p = parser._parse("'Foo\\'Bar'", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of str") self.assertIsInstance(p.children[0], str, "Result should be an instance of int") p = parser._parse("'Bar'", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of str") self.assertIsInstance(p.children[0], str, "Result should be an instance of int") p = parser._parse("4E+8", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of float") self.assertIsInstance(p.children[0], float, "Result should be an instance of int") p = parser._parse("'Foo\\\"bar'", True) self.assertIsInstance(p, SyntaxTree, "Result should be an instance of str") self.assertIsInstance(p.children[0], str, "Result should be an instance of int") p = parser._parse("sample_rating", True) self.assertIsInstance(p, str, "Result should be an instance of str")