def geocoding(switch, dataset, modern, historical): #if config: switch = 'historical' metadata = {} metadataitem = {} if title: metadataitem['title'] = title metadataitem['units'] = units if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: # Do conversion to webmapper system if not historicaldata: maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex maindata = maindata[maindata.index > 0] else: maindata = historicaldata maindata = maindata.convert_objects(convert_numeric=True) (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) #for colname in notint: #maindata = maindata.drop(colname, axis=1) # Drop num if in dataframe if '1' in maindata.columns: maindata = maindata.drop('1', axis=1) #dataframe[pid] = maindata metadata[handle] = metadataitem return (maindata, metadata, coder)
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} #switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config['geocoderhandle']) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, '') (modern, historical) = loadgeocoder(config, dataset, 'geocoder') coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter['startyear']: coderyears = [] for i in range(int(datafilter['startyear']), int(datafilter['endyear'])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(' ', '_', filetitle) filetitle = re.sub(r'[\(\)]', '_', filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) #return ('test', 'test') if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, '') if switch == 'modern': maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = '' try: if not historicaldata.empty: isdata = 'yes' except: isdata = 'no' if isdata == 'no': maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata['title'] = title else: metadata['title'] = '' if units: metadata['units'] = units else: metadata['units'] = '' (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) #return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) #return (finalsubset.to_html(), 'test') if fullpath: config['emptyvalues'] = 'no' if config['emptyvalues'] == 'no': (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset(fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter) return (filetitle, fullpath, finalsubset)
from datasets import loaddataset, countrystats, loaddataset_fromurl, loadgeocoder, treemap from sys import argv import pandas as pd import numpy as np handles = [] remote = 'on' handle = "hdl:10622/LIKXCZ" config = configuration() if remote: (classification, dataset) = loaddataset_fromurl(config['apiroot'], config['geocoderhandle']) else: dataset = loaddataset(handles) (modern, historical) = loadgeocoder(dataset) handles = [] handle = 'hdl:10622/DIUBXI' #handle = 'hdl:10622/GZ7O1K' handles.append(handle) if remote: (class1, dataset) = loaddataset_fromurl(config['apiroot'], handle) else: dataset = loaddataset(handles) #modern.ix[76]['country'] #historical.ix[1] print class1 #print dataset.to_html() res = treemap(dataset)
handles = [] config = configuration() config['remote'] = '' # Geocoder handle = config['geocoderhandle'] (classification, dataset, title, units) = content2dataframe(config, config['geocoderhandle']) #if remote: # (classification, dataset) = loaddataset_fromurl(config, handle) #else: # dataset = loaddataset(handles) (geocoder, geolist, oecdlist) = buildgeocoder(dataset, config, '') #print geocoder (modern, historical) = loadgeocoder(config, dataset, 'geocoder') switch = 'historical' switch = 'modern' if switch == 'modern': activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical handle = "hdl:10622/DIUBXI" handle = "hdl:10622/WNGZ4A" handle = "hdl:10622/GZ7O1K"
def dataframe_compiler(config, fullpath, handle, switch, datafilter): handles = [] ctrlist = [] metadata = {} # switch = 'historical' # Load Geocoder (classification, dataset, title, units) = content2dataframe(config, config["geocoderhandle"]) (geocoder, geolist, oecd2webmapper) = buildgeocoder(dataset, config, "") (modern, historical) = loadgeocoder(config, dataset, "geocoder") coderyears = [] # Default years selection for i in range(1500, 2016): coderyears.append(i) # Years selected if datafilter["startyear"]: coderyears = [] for i in range(int(datafilter["startyear"]), int(datafilter["endyear"])): coderyears.append(i) # Reading dataset (class1, dataset, title, units) = content2dataframe(config, handle) filetitle = title filetitle = re.sub(" ", "_", filetitle) filetitle = re.sub(r"[\(\)]", "_", filetitle) if filetitle: fullpath = "%s/%s_%s.xlsx" % (fullpath, filetitle, switch) # return ('test', 'test') if switch == "modern": activeindex = modern.index coder = modern class1 = switch else: activeindex = historical.index coder = historical (moderndata, historicaldata) = loadgeocoder(config, dataset, "") if switch == "modern": maindata = moderndata else: # Do conversion to webmapper system if there are no historical data isdata = "" try: if not historicaldata.empty: isdata = "yes" except: isdata = "no" if isdata == "no": maindata = moderndata webmapperindex = [] for code in maindata.index: try: webmappercode = oecd2webmapper[int(code)] except: webmappercode = -1 webmapperindex.append(webmappercode) maindata.index = webmapperindex # Drop not recognized locations maindata = maindata[maindata.index > 0] else: maindata = historicaldata if title: metadata["title"] = title else: metadata["title"] = "" if units: metadata["units"] = units else: metadata["units"] = "" (cfilter, notint) = selectint(maindata.columns) codes = selectint(maindata.index) geo = load_geocodes(config, switch, codes, maindata, geolist) for colname in notint: maindata = maindata.drop(colname, axis=1) # Apply filters if datafilter: (datasubset, ctrlist) = datasetfilter(maindata, datafilter) else: datasetset = maindata # Create Excel file out from dataframe (yearscolumns, notyears) = selectint(maindata.columns) (countryinfo, notcountry) = selectint(maindata.index) (finalsubset, icoder, isyear, ctrfilter, nodata) = dataset_analyzer(datasubset, coder, yearscolumns) # return (finalsubset, fullpath, finalsubset) # Apply filter to countries if datafilter["ctrlist"]: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) # return (finalsubset.to_html(), 'test') if fullpath: config["emptyvalues"] = "no" if config["emptyvalues"] == "no": (coderyears, notyears) = selectint(finalsubset.columns) datafile = create_excel_dataset( fullpath, icoder, metadata, icoder.columns, coderyears, finalsubset, isyear, ctrfilter ) return (filetitle, fullpath, finalsubset)
if datafilter['ctrlist']: tmpcoder = icoder.ix[ctrlist] icoder = pd.DataFrame(tmpcoder) datafile = create_excel_dataset(fullpath, icoder, metadata[handle], icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" return datafile for ffile in onlyfiles: pid = 'clio' fullpath = mypath + '/' + ffile isexcel = re.match('(.+)\.xls', ffile) if isexcel: newfile = isexcel.group(1) + "-historical.xlsx" fulloutfile = outdir + "/" + newfile print fulloutfile pid = newfile handle = pid print handle (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) (dataset, title, units) = compiledataset(csvfile) switch = 'historical' (maindata, metadata, coder) = geocoding(switch, dataset, modern, historical) (moderndata, historicaldata) = loadgeocoder(config, maindata, '') maindata = conversion(maindata, moderndata, historicaldata) print metadata[handle]['title'] outfile = store_dataset(fulloutfile, maindata, metadata, coder) print outfile
from sys import argv import pandas as pd import numpy as np handles = [] remote = 'on' handle = "hdl:10622/LIKXCZ" config = configuration() if remote: (classification, dataset) = loaddataset_fromurl(config['apiroot'], config['geocoderhandle']) else: dataset = loaddataset(handles) (modern, historical) = loadgeocoder(dataset) handles = [] handle = 'hdl:10622/DIUBXI' #handle = 'hdl:10622/GZ7O1K' handles.append(handle) if remote: (class1, dataset) = loaddataset_fromurl(config['apiroot'], handle) else: dataset = loaddataset(handles) #modern.ix[76]['country'] #historical.ix[1] print class1 #print dataset.to_html() res = treemap(dataset)
icoder.columns, coderyears, finalsubset, isyear, ctrfilter) b = datetime.now() d = b - a print "Time: " + str(d.seconds) + " seconds" return datafile for ffile in onlyfiles: pid = 'clio' fullpath = mypath + '/' + ffile isexcel = re.match('(.+)\.xls', ffile) if isexcel: newfile = isexcel.group(1) + "-historical.xlsx" fulloutfile = outdir + "/" + newfile print fulloutfile pid = newfile handle = pid print handle (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) (dataset, title, units) = compiledataset(csvfile) switch = 'historical' (maindata, metadata, coder) = geocoding(switch, dataset, modern, historical) (moderndata, historicaldata) = loadgeocoder(config, maindata, '') maindata = conversion(maindata, moderndata, historicaldata) print metadata[handle]['title'] outfile = store_dataset(fulloutfile, maindata, metadata, coder) print outfile
from openpyxl.cell import get_column_letter import numpy as np handles = [] remote = 'on' config = configuration() # Geocoder handle = config['geocoderhandle'] if remote: (classification, dataset, title, units) = loaddataset_fromurl(config, handle) else: dataset = loaddataset(handles) (geocoder, geolist, oecd) = buildgeocoder(dataset, config, '') (modern, historical) = loadgeocoder(config, dataset, 'geocoder') coderyears = [] for i in range(1500, 2015): coderyears.append(i) # Reading dataset handle = "hdl:10622/WNGZ4A" #handle = "hdl:10622/DIUBXI" handles = [] handles.append(handle) switch = 'historical' #switch = 'modern' if remote: (class1, dataset, title, units) = loaddataset_fromurl(config, handle) else: