Exemplo n.º 1
0
 def test_extended_string_to_long(self, spark_session, input_value,
                                  expected_value):
     input_df = self.create_input_df(input_value, spark_session)
     output_df = Mapper(
         mapping=[("output_key", "input_key",
                   "extended_string_to_long")]).transform(input_df)
     assert output_df.first().output_key == expected_value
     assert isinstance(output_df.schema["output_key"].dataType, T.LongType)
Exemplo n.º 2
0
 def test_extended_string_unix_timestamp_ms_to_date_spark2(
         self, spark_session, input_value, expected_value):
     input_df = self.create_input_df(input_value, spark_session)
     output_df = Mapper(mapping=[(
         "output_key", "input_key",
         "extended_string_unix_timestamp_ms_to_date")]).transform(input_df)
     actual_value = output_df.first().output_key
     assert actual_value == expected_value
     assert isinstance(output_df.schema["output_key"].dataType, T.DateType)
Exemplo n.º 3
0
 def test_extended_string_to_float(self, spark_session, input_value,
                                   expected_value):
     input_df = self.create_input_df(input_value, spark_session)
     output_df = Mapper(
         mapping=[("output_key", "input_key",
                   "extended_string_to_float")]).transform(input_df)
     actual_value = output_df.first().output_key
     if actual_value is not None:
         assert pytest.approx(actual_value) == expected_value
     else:
         assert actual_value == expected_value
     assert isinstance(output_df.schema["output_key"].dataType, T.FloatType)
Exemplo n.º 4
0
 def test_spark_sql_object(self, spark_session, input_value_1,
                           input_value_2, mapper_function, expected_value):
     input_df = self.create_input_df(input_value_1, input_value_2,
                                     spark_session)
     output_df = Mapper(mapping=[("output_key", mapper_function,
                                  "as_is")]).transform(input_df)
     actual = output_df.first().output_key
     if isinstance(expected_value, datetime.datetime):
         assert (expected_value - datetime.timedelta(seconds=30)
                 ) < actual < datetime.datetime.now()
     else:
         assert actual == expected_value
Exemplo n.º 5
0
 def test_extended_string_to_date_spark2(self, spark_session, input_value,
                                         expected_value):
     input_df = self.create_input_df(input_value, spark_session)
     output_df = Mapper(
         mapping=[("output_key", "input_key",
                   "extended_string_to_date")]).transform(input_df)
     try:
         actual_value = output_df.first().output_key
     except ValueError:
         # If input is in milliseconds it will still be stored in the DF but cannot be collected in Python
         actual_value = "out_of_range_for_python"
     assert actual_value == expected_value
     assert isinstance(output_df.schema["output_key"].dataType, T.DateType)
Exemplo n.º 6
0
 def test_generate_select_expression_for_unix_timestamp_ms_to_spark_timestamp(
         self, input_value, spark_session):
     input_df = spark_session.createDataFrame(
         [Row(input_column=input_value)],
         schema=T.StructType(
             [T.StructField("input_column", T.LongType(), True)]))
     output_df = Mapper(mapping=[(
         "output_column", "input_column",
         "unix_timestamp_ms_to_spark_timestamp")]).transform(input_df)
     expected_value = datetime.datetime.fromtimestamp(input_value / 1000.0)
     assert output_df.first(
     ).output_column == expected_value, "Processing of column value"
     assert output_df.schema.fieldNames() == ["output_column"
                                              ], "Renaming of column"
     assert output_df.schema["output_column"].dataType.typeName(
     ) == "timestamp", "Casting of column"
Exemplo n.º 7
0
 def test_generate_select_expression_for_meters_to_cm(
         self, input_value, expected_value, spark_session):
     input_df = spark_session.createDataFrame(
         data=[Row(input_key=input_value)],
         schema=T.StructType([
             T.StructField("input_key", get_spark_data_type(input_value),
                           True)
         ]))
     output_df = Mapper(mapping=[("output_column", "input_key",
                                  "meters_to_cm")]).transform(input_df)
     assert output_df.first(
     ).output_column == expected_value, "Processing of column value"
     assert output_df.schema.fieldNames() == ["output_column"
                                              ], "Renaming of column"
     assert output_df.schema["output_column"].dataType.typeName(
     ) == "integer", "Casting of column"