Exemplo n.º 1
0
def test_big_data():
    acs = ACS()

    variables = ["B01001_002E"] * 50 + ['NAME']
    geography = 'county'
    inside = 'state:01'
    result = acs.data(variables, geography, inside)

    assert result.shape[1] == 4
    assert set(result.columns) == {'B01001_002E', 'NAME', 'county', 'state'}
Exemplo n.º 2
0
def test_big_query():
    acs = ACS()

    variables = ["B01001_002E"] * 50
    geography = 'county'
    inside = 'state:01'
    result = acs._query(variables, geography, inside)

    assert len(result) > 0
    assert len(result[0]) == 52
Exemplo n.º 3
0
def test_simple_query():
    acs = ACS()

    variables = ["NAME", "B01001_002E", "B01001_026E"]
    geography = 'county'
    inside = 'state:01'

    result = acs._query(variables, geography, inside)

    assert len(result) > 0
    assert len(result[0]) == 5
Exemplo n.º 4
0
def test_hierarchic_inside():
    acs = ACS()

    variables = ["NAME", "B01001_002E", "B01001_026E"]
    geography = 'tract'
    inside = 'state:01 county:001'
    result = acs.data(variables, geography, inside)

    result_cols = set(result.columns)
    expected_cols = set(variables) | set(['tract', 'state', 'county'])
    assert result_cols == expected_cols
Exemplo n.º 5
0
def test_simple_data():
    acs = ACS()

    variables = ["NAME", "B01001_002E", "B01001_026E"]
    geography = 'county'
    inside = 'state:01'
    result = acs.data(variables, geography, inside)

    result_cols = set(result.columns)
    expected_cols = {'B01001_002E', 'B01001_026E', 'NAME', 'county', 'state'}
    assert result.shape[1] == 5
    assert result_cols == expected_cols
Exemplo n.º 6
0
def test_simple_data_dictvariables():
    acs = ACS()

    variables = {
        'NAME': 'Geography Name',
        'B01001_002E': 'Male Population',
        'B01001_026E': 'Female Population',
    }
    geography = 'county'
    inside = 'state:01'

    result = acs.data(variables, geography, inside)

    result_cols = set(result.columns)
    expected_cols = set(variables.values()) | {'state', 'county'}
    assert result_cols == expected_cols
Exemplo n.º 7
0
def test_dtypes():
    acs = ACS()

    variables = ["NAME", "B01001_002E", "B01001_026E"]
    geography = 'county'
    inside = 'state:01'
    df = acs.data(variables, geography, inside)

    # numeric columns
    result = set(c for c in df.select_dtypes('number').columns)
    expected = {'B01001_002E', 'B01001_026E'}
    assert result == expected

    # object columns (string)
    result = set(c for c in df.select_dtypes('object').columns)
    expected = {'NAME', 'county', 'state'}
    assert result == expected
Exemplo n.º 8
0
def test_null_variables():
    # When we make a request with a variable not available at the
    # desired level, the Census returns nulls, and the ACS class should
    # generate a warning.

    acs = ACS()

    variables = {
        'B01001_001E': 'VARIABLE_OKAY',
        'B06009_001E': 'VARIABLE_NULLS',
    }
    geography = 'block group'
    inside = 'state:01 county:001'

    with pytest.warns(UserWarning) as record:
        result = acs.data(variables, geography, inside)

    wmsg = record[0].message.args[0]
    assert 'B06009_001E' in wmsg
    assert 'B01001_001E' not in wmsg
    assert set(result.columns) >= set(variables.values())
    # exactly one column with all nulls
    assert result.isnull().all(axis=0).sum() == 1