def test_merge_tables_duplicate_column_names(): """ Confirm tables can be merged with overlapping column names, as long as they're not included in the list of columns to retain. """ d = { 'building_id': [1, 2, 3, 4], 'value': [4, 4, 4, 4], 'dupe': [1, 1, 1, 1] } buildings = pd.DataFrame(d).set_index('building_id') d = { 'household_id': [1, 2, 3], 'building_id': [2, 3, 4], 'dupe': [1, 1, 1] } households = pd.DataFrame(d).set_index('household_id') # Duplicate columns should raise a ValueError try: merged = merge_tables([households, buildings]) pytest.fail() except ValueError as e: print(e) # Excluding the duplicated name should make things ok merged = merge_tables([households, buildings], columns=['value']) assert sorted(all_cols(merged)) == sorted(['household_id', 'value'])
def test_all_cols_dataframe(df): """ Confirm that all_cols() works with DataFrame input. """ cols = utils.all_cols(df) assert sorted(cols) == sorted(['id', 'val1', 'val2'])
def test_all_cols_orca(df): """ Confirm that all_cols() works with Orca input. """ orca.add_table('df', df) cols = utils.all_cols('df') assert sorted(cols) == sorted(['id', 'val1', 'val2'])
def test_all_cols_extras(df): """ Confirm that all_cols() includes columns not part of the Orca core table. """ orca.add_table('df', df) orca.add_column('df', 'newcol', pd.Series()) cols = utils.all_cols('df') assert sorted(cols) == sorted(['id', 'val1', 'val2', 'newcol'])
def test_all_cols_unsupported_type(df): """ Confirm that all_cols() raises an error for an unsupported type. """ try: cols = utils.all_cols([df]) except ValueError as e: print(e) return pytest.fail()
def test_merge_two_tables(): """ Merge two tables. """ d = {'building_id': [1, 2, 3, 4], 'value': [4, 4, 4, 4]} buildings = pd.DataFrame(d).set_index('building_id') d = {'household_id': [1, 2, 3], 'building_id': [2, 3, 4]} households = pd.DataFrame(d).set_index('household_id') merged = merge_tables([households, buildings]) assert sorted(all_cols(merged)) == sorted( ['household_id', 'building_id', 'value'])
def test_merge_three_tables_out_of_order(): """ Merge three tables, where the second and third are each merged onto the first. """ d = {'zone_id': [1], 'size': [1]} zones = pd.DataFrame(d).set_index('zone_id') d = {'building_id': [1, 2, 3, 4], 'height': [4, 4, 4, 4]} buildings = pd.DataFrame(d).set_index('building_id') d = { 'household_id': [1, 2, 3], 'building_id': [2, 3, 4], 'zone_id': [1, 1, 1] } households = pd.DataFrame(d).set_index('household_id') merged = merge_tables([households, buildings, zones]) assert sorted(all_cols(merged)) == sorted( ['household_id', 'building_id', 'zone_id', 'height', 'size'])
def test_merge_three_tables(): """ Merge three tables. """ d = {'zone_id': [1], 'size': [1]} zones = pd.DataFrame(d).set_index('zone_id') d = { 'building_id': [1, 2, 3, 4], 'zone_id': [1, 1, 1, 1], 'height': [4, 4, 4, 4] } buildings = pd.DataFrame(d).set_index('building_id') d = {'household_id': [1, 2, 3], 'building_id': [2, 3, 4]} households = pd.DataFrame(d).set_index('household_id') merged = merge_tables([households, buildings, zones]) assert sorted(all_cols(merged)) == sorted( ['household_id', 'building_id', 'zone_id', 'height', 'size'])
def test_merge_tables_multiindex(): """ Merge tables where the source table has a multi-index. """ d = { 'building_id': [1, 1, 2, 2], 'unit_id': [1, 2, 1, 2], 'value': [4, 4, 4, 4] } units = pd.DataFrame(d).set_index(['building_id', 'unit_id']) d = { 'household_id': [1, 2, 3], 'building_id': [1, 1, 2], 'unit_id': [1, 2, 1] } households = pd.DataFrame(d).set_index('household_id') merged = merge_tables([households, units]) assert sorted(all_cols(merged)) == sorted( ['household_id', 'building_id', 'unit_id', 'value'])