Пример #1
0
if __name__ == "__main__":
    import sys
    azure = len(sys.argv) > 1 and sys.argv[1] == "azure"
    blob_upload = None
    if azure:
        print("Upload to azure")
        from azureblob.upload_file_to_blob import BlobUploader
        blob_upload = BlobUploader(make_container_public=True)
    else:
        print("No azure upload requested. Pulling locally only.")

    for key, files in sources.items():
        print("Downloading " + key)
        local_zip_file = "{0}/{1}.zip".format(tmp_data_folder, key)
        local_unzip_dir = "{0}/{1}".format(tmp_data_folder, key)
        pull_ftp_data(files['data'], local_zip_file)
        mapping = pull_http_data(files['mapping']).strip()

        mapping = mapping.split(',')

        # unzip the data
        z = ZipFile(local_zip_file)
        z.extractall(local_unzip_dir)
        for unzipfile in z.namelist():
            clean_file = "{0}/{1}.txt".format(clean_data_folder, key)
            to_json(mapping, "{0}/{1}".format(local_unzip_dir, unzipfile), clean_file)

            if blob_upload:
                print("Uploading to azure blob.")
                blob_upload.put_json_file(open(clean_file, 'r'), "raw_mappings/{0}.json".format(key))
Пример #2
0
    pass

try:
    os.mkdir(clean_data_folder)
except WindowsError:
    # Can't recreate same dir.
    pass


# perform pull
num_days_to_pull = (maxdate - mindate).days
for i in range(num_days_to_pull + 1):
    date_str = datetime.strftime(mindate + timedelta(days=i), "%Y%m%d")
    print("Parsing " + date_str)
    ziplocation, unziplocation = pull_data(date_str, tmp_data_folder)

    # unzip
    z = ZipFile(ziplocation)
    z.extractall(unziplocation)

    local_store_unzipped_folder(z)

if blob_upload:
    for filedate, fileobj in filehandles.items():
        print("Uploading main data for {0}".format(filedate))
        blob_upload.put_json_file(fileobj, "raw_filings/{0}.json".format(filedate))

    print("Uploading headers")
    for filedate, fileobj in org_defs.items():
        blob_upload.put_json_file(fileobj, "raw_headers/{0}.json".format(filedate))
Пример #3
0
import pandas as pd

zipcodes = 'http://www2.census.gov/geo/relfiles/cdsld13/natl/natl_zccd_delim.txt'
districts = 'http://www2.census.gov/geo/docs/reference/codes/files/national_cd113.txt'

# Get data
df_zip = pd.read_csv(zipcodes, skiprows=1)
df_zip.columns = ['State', 'ZCTA', 'District']
df_districts = pd.read_table(districts, delimiter='\s\s+', header=None, skiprows=1, engine='python')
df_districts.columns = ['STATE', 'STATEKEY', 'DISTRICT', 'DISTRICTNAME']

# get just state bits from districts.
states = df_districts.groupby('STATE').agg({'STATEKEY': max}).reset_index()

#
df_zip_withstates = df_zip.merge(states, left_on='State', right_on='STATEKEY', how='left')
df_zip_withstates = df_zip_withstates.icol(range(4))
df_zip_withstates.columns = ['state_code', "zcta", "district", "state"]
df_zip_withstates.to_csv("data/zip_to_district.csv", index=False)

azure = len(sys.argv) > 1 and sys.argv[1] == 'azure'

if azure:
    print("Uploading to azure")
    from azureblob.upload_file_to_blob import BlobUploader
    blob_upload = BlobUploader(make_container_public=True)

    blob_upload.put_json_file(open("data/zip_to_district.csv", 'r'), "raw_mappings/zip_to_district.csv")


Пример #4
0
"""
Download the zcta to county file (many to many relationship)
"""

import requests
import sys

use_azure = len(sys.argv) > 1 and sys.argv[1] == 'azure'

file_loc = "http://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt"

# Get file locally
r = requests.get(file_loc)

local_file = open("data/zcta_to_county.csv", 'w')
local_file.write(r.text)
local_file.close()

if use_azure:
    print("Uploading to azure")
    from azureblob.upload_file_to_blob import BlobUploader
    blob_upload = BlobUploader(make_container_public=True)

    blob_upload.put_json_file(open("data/zcta_to_county.csv", 'r'), "raw_mappings/zcta_to_county.csv")