def update_gp_addresses(): packages = dev.action.package_list() print 'Making sure Package exists' if 'gp-practice-addresses' not in packages: dev.action.package_create(name='gp-practice-addresses', title='Locations of GP Practices') with TMP: print 'Converting Excel Spreadsheet' dataconverters.dataconvert(GP_ADDRS, 'gp.addresses.csv') csvfile = TMP/'gp.addresses.csv' with csvfile.open('r') as upload: print 'Creating Resource' resource = dev.action.resource_create(package_id='gp-practice-addresses', upload=upload, format='csv', name=datetime.datetime.now().strftime('%d-%m-%y-%H-%S')) with csvfile.csv() as csv: print 'Preparing data for datastore' fieldnames = csv.next() fields = [{"id": n, "type": "text"} for n in fieldnames] records = [dict(zip(fieldnames, row)) for row in csv] print 'Uploading to datastore' dev.action.datastore_create( force=True, resource_id=resource['id'], fields=fields, records=records ) return 0
def extract(): '''Extract data from cached raw data files in archive and write to data/ ''' source = [ 'archive/home-price-index-levels.xls', 'archive/national-home-price-index-levels.xls' ] # all-month.csv might be more appropriate but we wanted to keep continuity # with cities-month.csv (before mid 2014 city data and national data were # provided separately but now there is just one file with everything) out_path = ['data/cities-month.csv', 'data/national-month.csv'] tmp_out = os.path.join('tmp', 'home-price-index-levels.csv'), os.path.join( 'tmp', 'national-home-price-index-levels.csv') for index in range(len(tmp_out)): dataconverters.dataconvert(source[index], tmp_out[index], guess_types=False) indata = open(tmp_out[index]).read() # fix time in dataconvert which adds 00:00:00 indata = indata.replace(' 00:00:00', '') indata = indata.replace('column_1', 'Date') indata = indata.split('\n') del indata[1] indata = '\n'.join(indata) open(out_path[index], 'w').write(indata)
def update_gp_addresses(): packages = dev.action.package_list() print 'Making sure Package exists' if 'gp-practice-addresses' not in packages: dev.action.package_create(name='gp-practice-addresses', title='Locations of GP Practices') with TMP: print 'Converting Excel Spreadsheet' dataconverters.dataconvert(GP_ADDRS, 'gp.addresses.csv') csvfile = TMP / 'gp.addresses.csv' with csvfile.open('r') as upload: print 'Creating Resource' resource = dev.action.resource_create( package_id='gp-practice-addresses', upload=upload, format='csv', name=datetime.datetime.now().strftime('%d-%m-%y-%H-%S')) with csvfile.csv() as csv: print 'Preparing data for datastore' fieldnames = csv.next() fields = [{"id": n, "type": "text"} for n in fieldnames] records = [dict(zip(fieldnames, row)) for row in csv] print 'Uploading to datastore' dev.action.datastore_create(force=True, resource_id=resource['id'], fields=fields, records=records) return 0
def extract(): '''Extract data from cached raw data files in archive and write to data/ ''' source = 'archive/home-price-index-levels.xls' # all-month.csv might be more appropriate but we wanted to keep continuity # with cities-month.csv (before mid 2014 city data and national data were # provided separately but now there is just one file with everything) out_path = 'data/cities-month.csv' tmp_out = os.path.join('tmp', 'home-price-index-levels.csv') dataconverters.dataconvert(source, tmp_out, guess_types=False) indata = open(tmp_out).read() # fix time in dataconvert which adds 00:00:00 indata = indata.replace(' 00:00:00', '') indata = indata.replace('column_1', 'Date') indata = indata.split('\n') del indata[1] indata = '\n'.join(indata) open(out_path, 'w').write(indata)
from dataconverters import dataconvert dataconvert('nat_ground_contacts.xls', 'outfile.csv', format='xls')
def xls_to_csv(): dataconverters.dataconvert(xls_national, tmp_national) dataconverters.dataconvert(xls_cities, tmp_cities, guess_types=False)