Exemplo n.º 1
0
def msa_iter():
	msa_re = re.compile(r"^(\d{5})\s{19}(.*) (Metro|Micro)politan Statistical Area$")
	txt_path = settings.dataset_path('locations', filename='List1.txt')
	for line in (l.strip() for l in open(txt_path)):
	    m = msa_re.match(line)
	    if m is not None:
	        yield dict(zip(HEADERS, m.groups()[:2])) 
Exemplo n.º 2
0
 def __init__(self):
     super(CensusTestFilter, self).__init__()
     self._msas = []
     csv_path = settings.dataset_path('default', filename='census.csv')
     for record in csv.DictReader(open(csv_path)):
         self._msas.append(record['msa'])
     self._msas = set(self._msas)
Exemplo n.º 3
0
def load_zipcodes():
    
    headers = (
        'country_code','postal_code','name',
        'state_name','state_code',
        'county_name','county_code',
        'community_name','community_code',
        'latitude','longitude','accuracy'
    )
    
    state_histogram = Histogram('state_code')
    state_histogram.label_length = 2
    
    csv_path = settings.dataset_path('default', filename='zipcodes.txt')
    
    run_recipe(
        CSVSource(open(csv_path), delimiter="\t", fieldnames=headers),
        FieldKeeper(('postal_code','name','state_code','latitude','longitude')),
        FieldModifier(('latitude','longitude'), float),
        FieldMerger({'latlng': ('latitude', 'longitude')}, lambda lat, lng: (lat, lng)),
        #MongoZipEmitter(),
        #DebugEmitter(),
        state_histogram,
    )
    
    return str(state_histogram)
Exemplo n.º 4
0
def load_locations():

    conn = MySQLdb.connect(
        user=settings.MYSQL_USER,
        passwd=settings.MYSQL_PASS,
        db=settings.MYSQL_DATABASE,
        host=settings.MYSQL_HOST,
        port=settings.MYSQL_PORT,
    )

    cursor = conn.cursor()
    cursor.execute("""DELETE FROM occupation_category""")
    cursor.execute("""DELETE FROM occupation""")
    cursor.close()

    path = settings.dataset_path(None, filename='occupations.csv')

    run_recipe(
        sources.CSVSource(open(path)),
        ValidOccupationFilter(),
        CategoryIDFilter(),
        CategoryEmitter(conn),
        OccupationEmitter(conn),
        #emitters.DebugEmitter(),
        error_stream = emitters.DebugEmitter(),
    )

    conn.commit()
    conn.close()
Exemplo n.º 5
0
def load_naccrra():
    
    csv_path = os.path.join(settings.dataset_path('default'), 'childcarecosts.csv')
    
    run_recipe(
        sources.CSVSource(open(csv_path)),
        filters.FieldRenamer({
            'state': 'State',
            'family_infant': 'Family-Infant',
            'family_4': 'Family-4-Year-Old',
            'family_school': 'Family-School-Age',
            'center_infant': 'Center-Infant',
            'center_4': 'Center-4-Year-Old',
            'center_school': 'Center-School-Age',
        }),
        MongoNACCRRAEmitter(),
        emitters.CountEmitter(),
        #emitters.DebugEmitter(),
        error_stream = emitters.DebugEmitter(),
    )
Exemplo n.º 6
0
        super(LocationUpdateEmitter, self).__init__()
        self._conn = sqlite3.connect(db_path)
        
    def emit_record(self, record):
        stmt = """UPDATE locations SET latitude = ?, longitude = ? WHERE city = ? AND state = ? AND zipcode = ?"""
        params = (
            record['latitude'],
            record['longitude'],
            record['city'],
            record['state'],
            record['zipcode'],
        )
        self._conn.execute(stmt, params)
        self._conn.commit()
    
    def done(self):
        self._conn.close()

db_path = settings.dataset_path('default', filename='load_nces.db')
csv_path = settings.dataset_path('nces', filename='school_locations.csv')

run_recipe(
    SqliteSource(db_path, """SELECT * FROM locations WHERE latitude = '' and longitude = ''"""),
    FieldMerger({'address': ('city','state','zipcode')}, lambda c, s, z: "%s, %s %s" % (c, s, z), keep_fields=True),
    GeocoderFilter('Kv3.btLV34EuebZGMzi1KaqI_BOPhPjx7FtbvED.umr8DGUq0NysoGN0XIIIDRU-', 'address'),
    FieldRemover('address'),
    CSVEmitter(open(csv_path, 'w'), fieldnames=('city','state','zipcode','latitude','longitude')),
    DebugEmitter(),
)

Exemplo n.º 7
0
def load_rpp():

    local = {}
    state = {}

    with open(settings.dataset_path('rpp', 'A5.coded.csv')) as infile:
        reader = csv.DictReader(infile)
        for record in reader:
            if record['code']:
                local[record['code']] = {
                    'overall': record['overall'],
                    'goods': record['goods'],
                    'services': record['services'],
                }
    
    with open(settings.dataset_path('rpp', 'A7.csv')) as infile:
        reader = csv.DictReader(infile)
        for record in reader:
            if record['state_abbreviation']:
                state[record['state_abbreviation']] = record.copy()
                del state[record['state_abbreviation']]['state']
                del state[record['state_abbreviation']]['state_abbreviation']
    
    locations = pymongo.Connection()['k2']['locations']

    db = MySQLdb.connect(
        user=settings.MYSQL_USER,
        passwd=settings.MYSQL_PASS,
        db=settings.MYSQL_DATABASE,
        host=settings.MYSQL_HOST,
    )

    cursor = db.cursor()

    for loc in locations.find({'good_to_go': True}):

        record = {}

        if loc['code'] in local:
            record.update(local[loc['code']])
        
        if loc['primary_state'] in state:
            record.update(state[loc['primary_state']])

        for key in record.iterkeys():
            record[key] = float(record[key])

        
        fields = ['code']
        values = [loc['code']]

        for key, value in record.iteritems():
            fields.append(key)
            values.append(value)

        stmt = "INSERT INTO rpp (%s) VALUES (%s)" % (",".join(fields), ",".join('%s' for v in values))

        print loc['code']

        cursor.execute(stmt, values)

    cursor.close()
    db.commit()
Exemplo n.º 8
0
paths = (
	"cu.data.3.AsizeNorthEast",
	"cu.data.4.AsizeNorthCentral",
	"cu.data.5.AsizeSouth",
	"cu.data.6.AsizeWest",
	"cu.data.7.OtherNorthEast",
	"cu.data.8.OtherNorthCentral",
	"cu.data.9.OtherSouth",
	"cu.data.10.OtherWest",
)

items:

for filename in paths:

	path = settings.dataset_path('cpi', filename)

	with open(path) as infile:

		for row in csv.DictReader(infile, delimiter='\t'):

			row = {k: v.strip() for (k, v) in row.iteritems()}

			sid = row['series_id']

			if not sid.startswith('CUU'):
				continue

			row['periodicity'] = sid[3]
			row['area_code'] = sid[4:8]
			row['item_code'] = sid[8:]
Exemplo n.º 9
0
import csv
import sys

from pymongo import Connection
import settings

PATH = settings.dataset_path('aff', 'DEC_10_DP_DPDP1_with_ann.csv')

def list_columns():

    with open(PATH) as infile:

        reader = csv.reader(infile)
        rows = [reader.next() for i in xrange(1, 10)]
        columns = zip(*rows)

        writer = csv.writer(sys.stdout)
        writer.writerow(('column', 'S', 'category', 'title', 'HD', 'type'))

        for i, col in enumerate(columns):

            writer.writerow((
                i + 1,
                col[0],
                col[1],
                " ".join(col[2:7]),
                col[7],
                col[8],
            ))

def load_data():