Пример #1
0
def test_standardize_parking_bay(spark):
    """Test data transform"""
    parkingbay_sdf = spark.read.json("./data/MelbParkingBayData.json",
                                     multiLine=True)
    load_id = 1
    loaded_on = datetime.datetime.now()
    t_parkingbay_sdf = standardize.standardize_parking_bay(
        parkingbay_sdf, load_id, loaded_on)

    assert t_parkingbay_sdf.count() != 0
    assert t_parkingbay_sdf.filter(isnull("bay_id")).count() == 0
def test_standardize_parking_bay(spark):
    """Test data transform"""
    # Arrange
    schema = standardize.get_schema("in_parkingbay_schema")
    parkingbay_sdf = spark.read.json("./data/MelbParkingBayData.json", multiLine=True, schema=schema)
    load_id = 1
    loaded_on = datetime.datetime.now()
    # Act
    t_parkingbay_sdf, t_parkingbay_malformed_sdf = standardize.standardize_parking_bay(parkingbay_sdf, load_id, loaded_on)  # noqa: E501
    # Assert
    assert t_parkingbay_sdf.count() != 0
    assert t_parkingbay_malformed_sdf.count() == 0
    assert t_parkingbay_sdf.filter(isnull("bay_id")).count() == 0
Пример #3
0
sensordata_schema = s.get_schema("in_sensordata_schema")

# Read data
parkingbay_sdf = spark.read\
  .schema(parkingbay_schema)\
  .option("badRecordsPath", os.path.join(base_path, "__corrupt", "MelbParkingBayData"))\
  .option("multiLine", True)\
  .json(parkingbay_filepath)
sensordata_sdf = spark.read\
  .schema(sensordata_schema)\
  .option("badRecordsPath", os.path.join(base_path, "__corrupt", "MelbParkingSensorData"))\
  .option("multiLine", True)\
  .json(sensors_filepath)

# Standardize
t_parkingbay_sdf, t_parkingbay_malformed_sdf = s.standardize_parking_bay(
    parkingbay_sdf, load_id, loaded_on)
t_sensordata_sdf, t_sensordata_malformed_sdf = s.standardize_sensordata(
    sensordata_sdf, load_id, loaded_on)

# Insert new rows
t_parkingbay_sdf.write.mode("append").insertInto("interim.parking_bay")
t_sensordata_sdf.write.mode("append").insertInto("interim.sensor")

# Insert bad rows
t_parkingbay_malformed_sdf.write.mode("append").insertInto(
    "malformed.parking_bay")
t_sensordata_malformed_sdf.write.mode("append").insertInto("malformed.sensor")

# COMMAND ----------

# MAGIC %md