Пример #1
0
def test_merge_tables_duplicate_column_names():
    """
    Confirm tables can be merged with overlapping column names, as long as they're not 
    included in the list of columns to retain.
    
    """
    d = {
        'building_id': [1, 2, 3, 4],
        'value': [4, 4, 4, 4],
        'dupe': [1, 1, 1, 1]
    }
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {
        'household_id': [1, 2, 3],
        'building_id': [2, 3, 4],
        'dupe': [1, 1, 1]
    }
    households = pd.DataFrame(d).set_index('household_id')

    # Duplicate columns should raise a ValueError
    try:
        merged = merge_tables([households, buildings])
        pytest.fail()
    except ValueError as e:
        print(e)

    # Excluding the duplicated name should make things ok
    merged = merge_tables([households, buildings], columns=['value'])
    assert sorted(all_cols(merged)) == sorted(['household_id', 'value'])
Пример #2
0
def test_merge_two_tables():
    """
    Merge two tables.
    
    """
    d = {'building_id': [1, 2, 3, 4], 'value': [4, 4, 4, 4]}
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {'household_id': [1, 2, 3], 'building_id': [2, 3, 4]}
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, buildings])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'value'])
Пример #3
0
def test_merge_three_tables_out_of_order():
    """
    Merge three tables, where the second and third are each merged onto the first.
    
    """
    d = {'zone_id': [1], 'size': [1]}
    zones = pd.DataFrame(d).set_index('zone_id')

    d = {'building_id': [1, 2, 3, 4], 'height': [4, 4, 4, 4]}
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {
        'household_id': [1, 2, 3],
        'building_id': [2, 3, 4],
        'zone_id': [1, 1, 1]
    }
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, buildings, zones])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'zone_id', 'height', 'size'])
Пример #4
0
def test_merge_three_tables():
    """
    Merge three tables.
    
    """
    d = {'zone_id': [1], 'size': [1]}
    zones = pd.DataFrame(d).set_index('zone_id')

    d = {
        'building_id': [1, 2, 3, 4],
        'zone_id': [1, 1, 1, 1],
        'height': [4, 4, 4, 4]
    }
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {'household_id': [1, 2, 3], 'building_id': [2, 3, 4]}
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, buildings, zones])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'zone_id', 'height', 'size'])
Пример #5
0
def test_merge_tables_multiindex():
    """
    Merge tables where the source table has a multi-index.
    
    """
    d = {
        'building_id': [1, 1, 2, 2],
        'unit_id': [1, 2, 1, 2],
        'value': [4, 4, 4, 4]
    }
    units = pd.DataFrame(d).set_index(['building_id', 'unit_id'])

    d = {
        'household_id': [1, 2, 3],
        'building_id': [1, 1, 2],
        'unit_id': [1, 2, 1]
    }
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, units])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'unit_id', 'value'])
Пример #6
0
def test_merge_tables_missing_values():
    """
    If the target table includes identifiers not found in the source table, missing 
    values should be inserted, changing the data type.
    
    """
    d = {
        'building_id': [1, 1, 2, 2],
        'unit_id': [1, 2, 1, 2],
        'value': [4, 4, 4, 4]
    }
    units = pd.DataFrame(d).set_index(['building_id', 'unit_id'])

    d = {
        'household_id': [1, 2, 3],
        'building_id': [1, 1, 3],
        'unit_id': [1, 2, 1]
    }
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, units])
    assert units.value.dtype == 'int64'
    assert merged.values.dtype == 'float64'