def test_group_transactions(spark): sql = SQLContext(spark) create_spark_views(spark, customers_location, products_location, transactions_location) group_transactions(spark) columns = spark.sql("""SELECT * FROM transactions_grouped""").columns assert "transactions_grouped" in sql.tableNames() assert columns == ["customer_id", "product_id", "count"]
def test_create_spark_views(spark): sql = SQLContext(spark) create_spark_views(spark, customers_location, products_location, transactions_location) assert sql.tableNames() == ['customers', 'products', 'raw_transactions']