Exemplo n.º 1
0
def run_extract(args):
    from extract import write_csv

    library = get_library(args)


    write_csv(library, args.ref, args.remote, raise_exc=args.exception, multi=args.multi)
Exemplo n.º 2
0
    def test_basic(self):
        from ambry import get_library

        l = get_library()

        p = l.partition('census.gov-acs-p5ye2014-b01001')

        df = p.dataframe()

        print df.dim()
Exemplo n.º 3
0
    def library(self):
        from ambry import get_library
        l =  get_library(root=self.test_root, db='sqlite:///{root}/library.db')

        if not l.exists:
            l.create()

        print 'Library: {}'.format(l.database.dsn)

        return l
Exemplo n.º 4
0
    def test_dump_all(self):
        from ambry import get_library
        from censuslib.dimensions import classify

        l = get_library()

        p = l.partition('census.gov-acs-p5ye2014-b01001')

        for c in p.table.columns:
            if c.name == 'id':
                continue
            if not c.name.endswith('_m90'):
                    print c.name, classify(c)
Exemplo n.º 5
0
def write_partition_csv_mp(args):
    from ambry import get_library
    remote_name, b_ref, p_ref, raise_exc = args

    library = get_library()
    remote = library.remote(remote_name)
    b = library.bundle(b_ref)
    p = library.partition(p_ref)

    try:
        write_partition_csv(library, remote, b, p)
    except Exception as e:
        library.logger.error(e)
        if raise_exc:
            raise
Exemplo n.º 6
0
def _generate_names():
    """ Code to generate the state and county names

    >>> python -c 'import geoid; geoid._generate_names()'

    """

    from ambry import get_library

    l = get_library()

    counties = l.partition('census.gov-acs-geofile-2009-geofile50-20095-50')
    states = l.partition('census.gov-acs-geofile-2009-geofile40-20095-40')

    names = {}
    for row in counties.remote_datafile.reader:
        names[(row.state, row.county)] = row.name

    for row in states.remote_datafile.reader:
        if row.component == '00':
            names[(row.state, 0)] = row.name

    pprint.pprint(names)
Exemplo n.º 7
0
def get_library(args):

    from ambry import get_library

    return get_library()
Exemplo n.º 8
0
#!/usr/bin/env python
#
# Create a Metatab file for the 2014 American Community Survey, 5 year release.
# This program requires Ambry, along with several Ambry datasets.
#

from ambry import get_library
from metatab.doc import MetatabDoc
from collections import defaultdict

l = get_library()
table_meta_p = l.partition(
    'census.gov-acs_geofile-schemas-2009e-table_meta-2014-5')
column_meta_p = l.partition(
    'census.gov-acs_geofile-schemas-2009e-column_meta-2014-5')

sequence_p = l.partition(
    'census.gov-acs_geofile-schemas-2009e-table_sequence-2014-5')
sequences = {
    row.table_id: (row.sequence_number, row.start, row.table_cells)
    for row in sequence_p if row.start
}

root_doc = MetatabDoc()
root = root_doc.new_section('Root')

root.new_term('Declare', 'http://assets.metatab.org/census.csv')
root.new_term('Title', 'American Community Survey, 5 Year, 2009-2014')
root.new_term('Release', 5)
root.new_term('Year', 2014)
root.new_term('Include', 'acs20145-sources.csv')
Exemplo n.º 9
0
)

# Dealth with some stupid bug .... 
import ssl
_old_match_hostname = ssl.match_hostname

def _new_match_hostname(cert, hostname):
    if hostname.endswith('.s3.amazonaws.com'):
        pos = hostname.find('.s3.amazonaws.com')
        hostname = hostname[:pos].replace('.', '') + hostname[pos:]
    return _old_match_hostname(cert, hostname)

ssl.match_hostname = _new_match_hostname

l = ambry.get_library()
b = l.bundle('census.gov-acs-p{}ye{}'.format(release, year))


# Break up the rows we are storing to reduce memory usage
sumlevel_groups = [
    [40,50,60,160,400,950,960,970,610,620,500],
    [140], # Tracts
    [150]  # Blockgroups
]
    

def write_rows(sumlevel, table_name, p, rows):
    
        file_name = "{}/{}/{}/{}.csv".format(year, release, table_name, sumlevel)