def run_extract(args): from extract import write_csv library = get_library(args) write_csv(library, args.ref, args.remote, raise_exc=args.exception, multi=args.multi)
def test_basic(self): from ambry import get_library l = get_library() p = l.partition('census.gov-acs-p5ye2014-b01001') df = p.dataframe() print df.dim()
def library(self): from ambry import get_library l = get_library(root=self.test_root, db='sqlite:///{root}/library.db') if not l.exists: l.create() print 'Library: {}'.format(l.database.dsn) return l
def test_dump_all(self): from ambry import get_library from censuslib.dimensions import classify l = get_library() p = l.partition('census.gov-acs-p5ye2014-b01001') for c in p.table.columns: if c.name == 'id': continue if not c.name.endswith('_m90'): print c.name, classify(c)
def write_partition_csv_mp(args): from ambry import get_library remote_name, b_ref, p_ref, raise_exc = args library = get_library() remote = library.remote(remote_name) b = library.bundle(b_ref) p = library.partition(p_ref) try: write_partition_csv(library, remote, b, p) except Exception as e: library.logger.error(e) if raise_exc: raise
def _generate_names(): """ Code to generate the state and county names >>> python -c 'import geoid; geoid._generate_names()' """ from ambry import get_library l = get_library() counties = l.partition('census.gov-acs-geofile-2009-geofile50-20095-50') states = l.partition('census.gov-acs-geofile-2009-geofile40-20095-40') names = {} for row in counties.remote_datafile.reader: names[(row.state, row.county)] = row.name for row in states.remote_datafile.reader: if row.component == '00': names[(row.state, 0)] = row.name pprint.pprint(names)
def get_library(args): from ambry import get_library return get_library()
#!/usr/bin/env python # # Create a Metatab file for the 2014 American Community Survey, 5 year release. # This program requires Ambry, along with several Ambry datasets. # from ambry import get_library from metatab.doc import MetatabDoc from collections import defaultdict l = get_library() table_meta_p = l.partition( 'census.gov-acs_geofile-schemas-2009e-table_meta-2014-5') column_meta_p = l.partition( 'census.gov-acs_geofile-schemas-2009e-column_meta-2014-5') sequence_p = l.partition( 'census.gov-acs_geofile-schemas-2009e-table_sequence-2014-5') sequences = { row.table_id: (row.sequence_number, row.start, row.table_cells) for row in sequence_p if row.start } root_doc = MetatabDoc() root = root_doc.new_section('Root') root.new_term('Declare', 'http://assets.metatab.org/census.csv') root.new_term('Title', 'American Community Survey, 5 Year, 2009-2014') root.new_term('Release', 5) root.new_term('Year', 2014) root.new_term('Include', 'acs20145-sources.csv')
) # Dealth with some stupid bug .... import ssl _old_match_hostname = ssl.match_hostname def _new_match_hostname(cert, hostname): if hostname.endswith('.s3.amazonaws.com'): pos = hostname.find('.s3.amazonaws.com') hostname = hostname[:pos].replace('.', '') + hostname[pos:] return _old_match_hostname(cert, hostname) ssl.match_hostname = _new_match_hostname l = ambry.get_library() b = l.bundle('census.gov-acs-p{}ye{}'.format(release, year)) # Break up the rows we are storing to reduce memory usage sumlevel_groups = [ [40,50,60,160,400,950,960,970,610,620,500], [140], # Tracts [150] # Blockgroups ] def write_rows(sumlevel, table_name, p, rows): file_name = "{}/{}/{}/{}.csv".format(year, release, table_name, sumlevel)