def tableapis(handle, customcountrycodes, fromyear, toyear, customyear, logflag): # years in filter config = {} indicator = '' config = configuration() DEBUG = 0 try: (dataset, revid, cliopid, clearpid) = findpid(handle) except: dataset = handle try: apifile = str(dataset) + ".json" jsonapi = config['apiroot'] + "/collabs/static/data/" + apifile dataframe = load_api_data(jsonapi, '') except: jsonapi = config['apiroot'] + "/api/datasets?handle=Panel[" + handle + "]" datajson = load_api_data(jsonapi, '') for handledata in datajson: dataframe = handledata['data'] # DEBUG2 #print dataframe loccodes = loadcodes(dataframe) (ctr, header) = countryset(customcountrycodes, loccodes) (frame, years, values, dates, original) = createframe(indicator, loccodes, dataframe, customyear, fromyear, toyear, ctr, logflag, DEBUG) names = ['indicator', 'm', 'ctrcode', 'country', 'year', 'intcode', 'value', 'id'] (csvdata, aggrdata) = combinedata(ctr, frame, loccodes) return (years, frame, csvdata, aggrdata, original)
def totalstatistics(settings=''): config = configuration() if config['error']: return config['error'] handles = [] if request.args.get('handle'): handledataset = request.args.get('handle') (dataset, revid, cliopid, clearpid) = findpid(handledataset) handles.append(dataset) if request.args.get('dataset'): dataset = request.args.get('dataset') handles.append(dataset) if request.args.get('yearmin'): fromyear = request.args.get('yearmin') if request.args.get('yearmax'): toyear = request.args.get('yearmax') html = '' for dataset in handles: jsonapi = config['apiroot'] + "/collabs/static/data/" + str(dataset) + ".json" data = createdata(jsonapi) d = data.describe() show = d.transpose() stats = show.to_html() html = html = stats + '<br>' return html
def metadata(dataset): #return ('xxx', '', '') config = configuration() (pid, fileid, revid, clearpid) = findpid(dataset) #return ('xxx', '', '') data = {} if pid: query = pid apiurl = config['dataverseroot'] + "/api/search?q=" + query + '&key=' + config['key'] + '&type=dataset' data = load_dataverse(apiurl) return (data, pid, fileid)
def metadata(dataset): #return ('xxx', '', '') config = configuration() (pid, fileid, revid, clearpid) = findpid(dataset) #return ('xxx', '', '') data = {} if pid: query = pid apiurl = config[ 'dataverseroot'] + "/api/search?q=" + query + '&key=' + config[ 'key'] + '&type=dataset' data = load_dataverse(apiurl) return (data, pid, fileid)
def tableapis(handle, customcountrycodes, fromyear, toyear, customyear, logflag): # years in filter config = {} indicator = '' config = configuration() DEBUG = 0 try: (dataset, revid, cliopid, clearpid) = findpid(handle) except: dataset = handle try: apifile = str(dataset) + ".json" jsonapi = config['apiroot'] + "/collabs/static/data/" + apifile dataframe = load_api_data(jsonapi, '') except: jsonapi = config[ 'apiroot'] + "/api/datasets?handle=Panel[" + handle + "]" datajson = load_api_data(jsonapi, '') for handledata in datajson: dataframe = handledata['data'] # DEBUG2 #print dataframe loccodes = loadcodes(dataframe) (ctr, header) = countryset(customcountrycodes, loccodes) (frame, years, values, dates, original) = createframe(indicator, loccodes, dataframe, customyear, fromyear, toyear, ctr, logflag, DEBUG) names = [ 'indicator', 'm', 'ctrcode', 'country', 'year', 'intcode', 'value', 'id' ] (csvdata, aggrdata) = combinedata(ctr, frame, loccodes) return (years, frame, csvdata, aggrdata, original)
def downloadzip(pid): DEBUG = 0 (fullpath) = ('') fullmetadata = {} logscale = 0 config = configuration() config['remote'] = 'on' API_TOKEN = config['key'] HOSTNAME = config['dataverseroot'] cmd = "--insecure -u " + API_TOKEN + ": " + HOSTNAME + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' customyear = '' fromyear = request.args.get('y[min]') toyear = request.args.get('y[max]') historical = request.args.get('type[0]') (handles, pidslist) = pidfrompanel(pid) try: if pidslist: fullmetadata = load_fullmetadata(pidslist) except: showwarning = 1 # Log scales switch if request.args.get('logscale'): logscale = 1 # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] if handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' (classification, geodataset, title, units) = content2dataframe(config, config['geocoderhandle']) #geocoder = buildgeocoder(dataset, config) (modern, historical) = loadgeocoder(config, dataset, 'geocoder') for handle in handles: #if remote: # (class1, dataset) = loaddataset_fromurl(config, handle) #else: # dataset = loaddataset(handles) #(cfilter, notint) = selectint(activeindex.values) #(moderndata, historicaldata) = loadgeocoder(dataset, '') # CHANGE #return str(dataset.index) (header, panelcells, codes, datahub, data, handle2ind, unit2ind, original) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) filename = filename + '.xls' fullpath = panel2excel(finaldir, filename, header, panelcells, fullmetadata) else: # Clio format download zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, pid, tmpdir, arc, finaldir) (alonepid, revid, cliohandle, clearpid) = findpid(pid) if alonepid: handles = [ clearpid ] for pid in handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' filename = filename + '.xls' # 2DEBUG (header, panelcells, codes, datahub, data, handle2ind, unit2ind, originalvalues) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) #codes = hist #return str(fullmetadata) metadata = fullmetadata result = individual_dataset(finaldir, filename, handle2ind[pid], unit2ind[pid], datahub, data[pid], codes, metadata) try: for everypid in handles: # Download papers zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, everypid, tmpdir, arc, finaldir) except: nopapers = 1 compile2zip(finaldir, arc) filename = arc return filename
def downloadzip(pid): DEBUG = 0 (fullpath) = ('') fullmetadata = {} logscale = 0 config = configuration() config['remote'] = 'on' API_TOKEN = config['key'] HOSTNAME = config['dataverseroot'] cmd = "--insecure -u " + API_TOKEN + ": " + HOSTNAME + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/" tmpdir = config['tmpdir'] filerandom = randomword(10) #filerandom = '12345' arc = "data" + filerandom + ".zip" filename = filerandom finaldir = config['path'] + '/static/tmp' # ToDO if filename: finaldir = str(finaldir) + '/' + str(filename) tmpdir = str(tmpdir) + '/' + str(filename) try: os.mkdir(tmpdir) os.mkdir(finaldir) except: donothing = 'ok' customyear = '' fromyear = request.args.get('y[min]') toyear = request.args.get('y[max]') historical = request.args.get('type[0]') (handles, pidslist) = pidfrompanel(pid) try: if pidslist: fullmetadata = load_fullmetadata(pidslist) except: showwarning = 1 # Log scales switch if request.args.get('logscale'): logscale = 1 # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' if customcountrycodes: customcountrycodes = customcountrycodes[:-1] if handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' (classification, geodataset, title, units) = content2dataframe(config, config['geocoderhandle']) #geocoder = buildgeocoder(dataset, config) (modern, historical) = loadgeocoder(config, dataset, 'geocoder') for handle in handles: #if remote: # (class1, dataset) = loaddataset_fromurl(config, handle) #else: # dataset = loaddataset(handles) #(cfilter, notint) = selectint(activeindex.values) #(moderndata, historicaldata) = loadgeocoder(dataset, '') # CHANGE #return str(dataset.index) (header, panelcells, codes, datahub, data, handle2ind, unit2ind, original) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) filename = filename + '.xls' fullpath = panel2excel(finaldir, filename, header, panelcells, fullmetadata) else: # Clio format download zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, pid, tmpdir, arc, finaldir) (alonepid, revid, cliohandle, clearpid) = findpid(pid) if alonepid: handles = [clearpid] for pid in handles: if historical: api = config['apiroot'] + "/collabs/static/data/historical.json" (regions, countries, ctr2reg, webmapper, geocoder) = histo(api, '') hist = countries else: hist = '' filename = filename + '.xls' # 2DEBUG (header, panelcells, codes, datahub, data, handle2ind, unit2ind, originalvalues) = data2panel(handles, customcountrycodes, fromyear, toyear, customyear, hist, logscale) #codes = hist #return str(fullmetadata) metadata = fullmetadata result = individual_dataset(finaldir, filename, handle2ind[pid], unit2ind[pid], datahub, data[pid], codes, metadata) try: for everypid in handles: # Download papers zipfile = get_papers(HOSTNAME, API_TOKEN, cmd, everypid, tmpdir, arc, finaldir) except: nopapers = 1 compile2zip(finaldir, arc) filename = arc return filename
def main(): handle = '' dataverse = '' customkey = '' config = configuration() try: myopts, args = getopt.getopt(sys.argv[1:], "H:r:d:k:D:") except getopt.GetoptError as e: print(str(e)) print( "Usage: %s -y year -d datatype -r region -f filename -DDEBUG -o output" % sys.argv[0]) sys.exit(2) (handle, rhandle, customdv) = ('', '', '') for o, a in myopts: if o == '-H': handle = a if o == '-r': rhandle = a if o == '-d': dataverse = a if o == '-k': customkey = a if o == '-D': customdv = a dataset = {} DEBUG = '' path = config['path'] # Default dataverse root = config['dataverseroot'] key = config['key'] dvname = config['branch'] if dataverse: root = dataverse if customkey: key = customkey if customdv: dvname = customdv files = [] if rhandle: contentsapi = root + "/api/dataverses/" + dvname + "/contents?key=" + key print contentsapi newdata = load_api_data(contentsapi, '') metadata = newdata['data'] for item in metadata: dv = item['id'] files = getfiles(root, dv, key) if handle: print handle (datahandle, datasetID, fileID) = parsehandle(handle) files.append(fileID) for fileID in files: fullpath = downloadfile(root, path, fileID, key) print fullpath (pid, revid, cliohandle, clearpid) = findpid(handle) #try: if pid: handle = pid jsonfile = dataextractor(fullpath, path, pid, fileID) if jsonfile: title = 'Test' datasetadd(jsonfile, clearpid, handle, title) print handle print clearpid
def main(): handle = '' dataverse = '' customkey = '' config = configuration() try: myopts, args = getopt.getopt(sys.argv[1:], "H:r:d:k:D:") except getopt.GetoptError as e: print(str(e)) print( "Usage: %s -y year -d datatype -r region -f filename -DDEBUG -o output" % sys.argv[0]) sys.exit(2) (handle, rhandle, customdv) = ('', '', '') for o, a in myopts: if o == '-H': handle = a if o == '-r': rhandle = a if o == '-d': dataverse = a if o == '-k': customkey = a if o == '-D': customdv = a dataset = {} DEBUG = '' path = config['path'] # Default dataverse root = config['dataverseroot'] key = config['key'] dvname = config['branch'] title = 'Title' units = 'Units' if dataverse: root = dataverse if customkey: key = customkey if customdv: dvname = customdv files = [] if rhandle: contentsapi = root + "/api/dataverses/" + dvname + "/contents?key=" + key print contentsapi newdata = load_api_data(contentsapi, '') metadata = newdata['data'] for item in metadata: dv = item['id'] files = getfiles(root, dv, key) if handle: print handle (datahandle, datasetID, fileID) = parsehandle(handle) files.append(fileID) for fileID in files: fullpath = downloadfile(root, path, fileID, key) print fullpath (pid, revid, cliohandle, clearpid) = findpid(handle) (jsonfile, csvfile) = ('', '') #try: if pid: handle = pid try: (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) except: resultfile = config['tmpdir'] + "/" + fileID (jsonfile, csvfile, tmptitle, tmpunits) = excelvalidator(config['phantompath'], fullpath, resultfile, config['tmpdir']) if jsonfile: remove = removedata('datasets', 'handle', clearpid) try: title = str(tmptitle) units = str(tmpunits) except: donothing = 1 print "ADD " + str(jsonfile) datasetadd(jsonfile, csvfile, clearpid, handle, title, units, datasetID) print handle print clearpid print datasetID
def dashboard(settings=''): activepage = 'Dashboard' config = configuration() if config['error']: return config['error'] apiroot = config['apiroot'] dataverseroot = config['dataverseroot'] key = config['key'] tabnum = '' ctrnum = 0 if not tabnum: tabnum = 3 # tabs with maps is default logscale = '' if config['perl']: perlbin = config['perl'] + ' ' else: perlbin = "/usr/bin/perl " path = config['path'] # Default parameters (datatitle, validate, topic, citation, cliopid, stats, fileid, clearpid) = ('', '', '', '', '', '', '', '') (handle, fromyear, toyear) = ('', 1500, 2012) (selectedcountries, selectedindicators) = ('', '') # Variables from dashboard varproject = request.args.get('project') varbase = request.args.get('base') dataset = request.args.get('dataset') # Log scales switch if request.args.get('logscale'): logscale = 1 if request.args.get('pid'): dataset = request.args.get('pid') if dataset: (handle, revid, cliopid, clearpid) = findpid(dataset) action = request.args.get('action') year = request.args.get('year') hist = request.args.get('hist') if request.args.get('yearmin'): fromyear = request.args.get('yearmin') if request.args.get('yearmax'): toyear = request.args.get('yearmax') if request.args.get('y[min]'): fromyear = request.args.get('y[min]') if request.args.get('y[max]'): toyear = request.args.get('y[max]') # Select countries customcountrycodes = '' f = request.args for key in f.keys(): if key == 'loc': for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' ctrnum = ctrnum + 1 if customcountrycodes: customcountrycodes = customcountrycodes[:-1] # Requests from Drupal fromdrupal = 0 for key in f.keys(): if is_location(key): for value in sorted(f.getlist(key)): customcountrycodes = str(customcountrycodes) + str(value) + ',' fromdrupal = 1 if fromdrupal: if customcountrycodes: customcountrycodes = customcountrycodes[:-1] template = "dashboard.html" # Load topics and locations api1 = apiroot + "/collabs/static/data/dataframe100_0.json" branch = config['branch'] indicatorlist = load_alltopics(api1, branch) api2 = apiroot + "/collabs/static/data/dataframe94_0.json" locations = load_locations(api2) api3 = apiroot + "/collabs/static/data/historical.json" historical = load_historical(api3) if hist: locations = historical try: (dataversemeta, pid, fileid, cliopid) = load_metadata(dataset) for item in dataversemeta: citation = 'Suggested citation: ' + str(item['citation']) datatitle = str(item['name']) topic = item['topic'] except: title = dataset if action == 'chart': template = 'chart.html' if action == 'map': template = 'worldmap.html' if action == 'validate': validate = 'yes' template = 'validate.html' template = 'simplechart.html' if action == 'geocoder': template = 'geocoder.html' if action == 'visualize': template = 'navigation.html' # DEB jsonapi = apiroot + "/collabs/static/data/" + str(pid) + ".json" data = createdata(jsonapi) d = data.describe() show = d.transpose() stats = show.to_html() benforddata = '' datasetfile = '' try: benforddata = benford(dataset, year, '') except: donothing = 1 valtitle = '' if validate: # VALIDATION return 'Dataset not updated' cmd = path + "/../../bin/import.py -d '" + dataverseroot + "' -H '" + dataset + "'" p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) maincontent = p.communicate()[0] valtitle = maincontent return 'Dataset updated' if benforddata: title = benforddata datasetfile = benforddata else: # Clio dataset try: cmd = perlbin + path + "/../../bin/api2data.pl " + dataset p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) maincontent = p.communicate()[0] title = "World Data Strikes, 1927-2009" datasetfile = maincontent except: donothing = 1 datasetfile = valtitle if datatitle: title = datatitle if customcountrycodes: selectedcountries = customcountrycodes if dataset: selectedindicators = "\"" + dataset + "\"" cliopids = cliopid # Choose tab if ctrnum <= 10: if ctrnum > 0: tabnum = 0 resp = make_response(render_template(template, active=activepage, pages=pages, title=title, datasetfile=datasetfile, dataset=dataset, stats=stats, topic=topic, citation=citation, cliopid=cliopid, indicatorlist=indicatorlist, locations=locations, fromyear=fromyear, toyear=toyear, customcountrycodes=customcountrycodes, handle=handle, selectedcountries=selectedcountries, selectedindicators=selectedindicators, cliopids=cliopids, logscale=logscale, tabnum=tabnum)) return resp
def mapslider(): (title, steps, customcountrycodes, fromyear, toyear, customyear, catmax, histo) = ('', 0, '', '1500', '2012', '', 6, '') config = configuration() datafilter = {} datafilter['ctrlist'] = '' datafilter['startyear'] = fromyear datafilter['endyear'] = toyear if config['error']: return config['error'] handleface = '' urlmatch = re.search(r'(.+)\&face', request.url) try: if urlmatch.group(0): thismapurl = urlmatch.group(1) except: thismapurl = request.url thismapurl = thismapurl.replace('http://', 'https://') geocoder = '' pids = [] handledataset = '' logscale = 0 handles = [] datahub = {} dataset = '' warning = '' hist = {} if request.args.get('ctrlist'): customcountrycodes = '' tmpcustomcountrycodes = request.args.get('ctrlist') c = tmpcustomcountrycodes.split(',') for ids in sorted(c): if ids: customcountrycodes = str(customcountrycodes) + str(ids) + ',' customcountrycodes = customcountrycodes[:-1] datafilter['ctrlist'] = customcountrycodes if request.args.get('dataset'): dataset = request.args.get('dataset') handles.append(dataset) if request.args.get('handle'): handledataset = request.args.get('handle') try: (pids, pidslist) = pidfrompanel(handledataset) except: nopanel = 'yes' handlestring = request.args.get('handle') ishandle = re.search(r'(hdl:\d+\/\w+)', handlestring) if ishandle: handle = ishandle.group(1) handle = handle.replace("'", "") else: handle = handlestring (dataset, revid, cliopid, clearpid) = findpid(handle) #handles.append(dataset) handles.append(handle) handleface = handle if request.args.get('logscale'): logscale = 1 if request.args.get('catmax'): catmax = request.args.get('catmax') if request.args.get('yearmin'): fromyear = request.args.get('yearmin') datafilter['startyear'] = fromyear if request.args.get('yearmax'): toyear = request.args.get('yearmax') datafilter['endyear'] = toyear if request.args.get('geocoder'): geocoder = request.args.get('geocoder') if request.args.get('hist'): geocoder = request.args.get('hist') histo = 'on' if request.args.get('face'): handleface = request.args.get('face') if handleface: handles = [] handle = handleface handles.append(handleface) try: pids.remove(handleface) except: nothing = 1 historical = 0 hubyears = [] if config: switch = 'modern' if histo: switch = 'historical' (geocoder, geolist, oecd2webmapper, modern, historical) = request_geocoder(config, '') (origdata, maindata, metadata) = request_datasets(config, switch, modern, historical, handles, geolist) (hubyears, notyears) = selectint(origdata.columns) title = metadata[handles[0]]['title'] for handle in handles: (datasubset, ctrlist) = datasetfilter(maindata[handle], datafilter) datasubset['handle'] = handle if not datasubset.empty: datasubset = datasubset.dropna(how='all') (allyears, notyears) = selectint(datasubset.columns) for year in datasubset: if datasubset[year].count() == 0: datasubset = datasubset.drop(year, axis=1) (hubyears, notyears) = selectint(datasubset.columns) validyears = [] lastyear = '' for year in sorted(hubyears): validyears.append(year) lastyear = year steps = steps + 1 handledict = {} if pids: hquery = formdatasetquery(pids,'') d = readdatasets('datasets', json.loads(hquery)) for x in d: thishandle = x['handle'] handledict[thishandle] = x['title'] #validyears.reverse() return make_response(render_template('mapslider.html', handle=handle, years=validyears, warning=warning, steps=steps, title=title, geocoder=histo, dataset=dataset, customcountrycodes=customcountrycodes, catmax=catmax, lastyear=lastyear, indicators=pids, thismapurl=thismapurl, handledict=handledict))
def main(): handle = '' dataverse = '' customkey = '' config = configuration() try: myopts, args = getopt.getopt(sys.argv[1:],"H:r:d:k:D:") except getopt.GetoptError as e: print (str(e)) print("Usage: %s -y year -d datatype -r region -f filename -DDEBUG -o output" % sys.argv[0]) sys.exit(2) (handle, rhandle, customdv) = ('', '', '') for o, a in myopts: if o == '-H': handle=a if o == '-r': rhandle=a if o == '-d': dataverse=a if o == '-k': customkey=a if o == '-D': customdv=a dataset = {} DEBUG = '' path = config['path'] # Default dataverse root = config['dataverseroot'] key = config['key'] dvname = config['branch'] title = 'Title' units = 'Units' if dataverse: root = dataverse if customkey: key = customkey if customdv: dvname = customdv files = [] if rhandle: contentsapi = root + "/api/dataverses/" + dvname +"/contents?key=" + key print contentsapi newdata = load_api_data(contentsapi, '') metadata = newdata['data'] for item in metadata: dv = item['id'] files = getfiles(root, dv, key) if handle: print handle (datahandle, datasetID, fileID) = parsehandle(handle) files.append(fileID) for fileID in files: fullpath = downloadfile(root, path, fileID, key) print fullpath (pid, revid, cliohandle, clearpid) = findpid(handle) (jsonfile, csvfile) =('', '') #try: if pid: handle = pid try: (jsonfile, csvfile, tmptitle, tmpunits) = dataextractor(fullpath, path, pid, fileID) except: resultfile = config['tmpdir'] + "/" + fileID (jsonfile, csvfile, tmptitle, tmpunits) = excelvalidator(config['phantompath'], fullpath, resultfile, config['tmpdir']) if jsonfile: remove = removedata('datasets', 'handle', clearpid) try: title = str(tmptitle) units = str(tmpunits) except: donothing = 1 print "ADD " + str(jsonfile) datasetadd(jsonfile, csvfile, clearpid, handle, title, units, datasetID) print handle print clearpid print datasetID