def test_consolidate(ingest, clean_etl): date = pendulum.from_format("2020_03_27", "YYYY_MM_DD").naive() for file_stem in ["2020_04_01_00_00_00-v2", "2020_03_27_00_00_00-v2"]: file_path = Path(f"tmp/raw/{file_stem}_2020_03_27.csv") ingest(file_path) table = Fact.child_or_load_table(date) task_instance = TaskInstanceMock("init") task_instance.xcom_push("config", { "date": str(date), "table_name": table.fullname }) clean_etl("consolidation", table.fullname, date) consolidate_callable(ti=task_instance) with engine.begin() as conn: count = conn.execute(table.select()).rowcount assert count == 198 count = conn.execute( table.select(table.c.session_end == None)).rowcount assert count == 0 count = conn.execute(table.select(table.c.pulltime_last)).rowcount assert count == 101 count = conn.execute( table.select( and_(table.c.pulltime_last, table.c.session_end != table.c.pulltime))).rowcount assert count == 1 count1 = conn.execute(table.select(table.c.pulltime_last)).rowcount count2 = conn.execute( select( [ table.c.userid_key, table.c.mac_key, table.c.ap_key, table.c.ssid_key, table.c.protocol_key, table.c.session_start, ], distinct=True, )).rowcount assert count1 == count2
def test_ingest_preprocessed(ingest): file_stem = "2020_04_01_00_00_00-v2" file_path = Path(f"tmp/raw/{file_stem}_2020_03_27.csv") ingest(file_path) date = pendulum.from_format(file_path.stem[23:], "YYYY_MM_DD").naive() child_fact = Fact.child_or_load_table(date) with engine.begin() as conn: count = conn.execute(child_fact.select()).rowcount assert count == 104 count = conn.execute( child_fact.select(child_fact.c.session_end == None)).rowcount assert count == 104