示例#1
0
import sys
import os
import re
import simplejson
import pandas as pd
from datetime import datetime
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname("__file__"), '../modules')))
from config import configuration, dataverse2indicators, load_dataverse, findpid, load_metadata
from datacompiler import dataframe_compiler

config = configuration()
config['remote'] = 'on'
datafilter = {}
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
#datafilter['ctrlist'] = '528,14,18,67'
handle = 'hdl:10622/SO62N5'
switch = 'historical'
fullpath = '/home/dpe/tmp'

a = datetime.now()
(filetitle, fullpath,
 finalsubset) = dataframe_compiler(config, fullpath, handle, switch,
                                   datafilter)
b = datetime.now()
d = b - a
print "Time: " + str(d.seconds) + " seconds"
#print finalsubset.ix[67][1831]
示例#2
0
def download():
    (classification, pid, root, switch, datafile) = ('modern', '', '',
                                                     'modern', '')
    handle = ''
    config = configuration()
    cmd = "--insecure -u " + config['key'] + ": " + config[
        'dataverseroot'] + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/"

    config['remote'] = ''
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2010'
    datafilter['ctrlist'] = ''

    tmpdir = config['tmpdir']
    filerandom = randomword(10)
    #filerandom = '12345'
    arc = "data" + filerandom + ".zip"
    filename = filerandom
    finaldir = config['path'] + '/static/tmp'
    # ToDO
    if filename:
        finaldir = str(finaldir) + '/' + str(filename)
        tmpdir = str(tmpdir) + '/' + str(filename)

    try:
        os.mkdir(tmpdir)
        os.mkdir(finaldir)
    except:
        donothing = 'ok'

    if request.args.get('handle'):
        handle = request.args.get('handle')
    if request.args.get('type[0]') == 'historical':
        classification = request.args.get('type[0]')
        switch = classification
    if request.args.get('y[min]'):
        datafilter['startyear'] = request.args.get('y[min]')
    if request.args.get('y[max]'):
        datafilter['endyear'] = request.args.get('y[max]')

    # Select countries
    customcountrycodes = ''
    f = request.args
    for key in f.keys():
        if is_location(key):
            for value in sorted(f.getlist(key)):
                customcountrycodes = str(customcountrycodes) + str(value) + ','
    if customcountrycodes:
        customcountrycodes = customcountrycodes[:-1]
        datafilter['ctrlist'] = customcountrycodes

    if request.args.get('ctrlist'):
        datafilter['ctrlist'] = request.args.get('ctrlist')

    if request.args.get('pid'):
        pid = request.args.get('pid')
        ispanel = ''
        try:
            (pids, pidslist) = pidfrompanel(pid)
            handles = pids
            handle = pids[0]
            match = re.match(r'Panel\[(.+)\]', pid)
            if match:
                ispanel = 'yes'
        except:
            handles = pid
            handle = pids[0]

        if ispanel:
            dirforzip = ''
            for handle in handles:
                dirforzip = get_papers(config['dataverseroot'], config['key'],
                                       cmd, handle, tmpdir, arc, finaldir)

            (header, panelcells, metadata,
             totalpanel) = build_panel(config, switch, handles, datafilter)
            filename = "paneldata.xlsx"
            metadata = []
            datadir = config['webtest']
            localoutfile = panel2excel(dirforzip, filename, header, panelcells,
                                       metadata)
            arc = 'dataarchive.zip'
            compile2zip(dirforzip, arc)
            root = config['apiroot'] + "/collabs/static/tmp/" + str(arc)
            return redirect(root, code=301)

    if classification:
        outfile = "clioinfra.xlsx"
        dirforzip = get_papers(config['dataverseroot'], config['key'], cmd,
                               handle, tmpdir, arc, finaldir)
        #fullpath = config['webtest'] + "/" + str(outfile)
        fullpath = dirforzip

        # Check selection
        isselection = 'yes'
        if datafilter['startyear'] == '1500':
            if datafilter['ctrlist'] == '':
                isselection = 'yes'

        if isselection:
            (datafile, outfilefinal,
             finalsubset) = dataframe_compiler(config, fullpath, handle,
                                               classification, datafilter)
            #return datafile.to_html()
        else:
            # Copy original dataset
            source = os.listdir(tmpdir)
            for excelfile in source:
                shutil.copy(tmpdir + '/' + excelfile, dirforzip)

        #return outfilefinal
        arc = 'dataarchive.zip'
        if datafile:
            arc = "%s_%s.zip" % (datafile, switch)
        compile2zip(dirforzip, arc)
        root = config['apiroot'] + "/collabs/static/tmp/" + str(arc)
        #root = config['apiroot'] + "/collabs/static/tmp/" + str(outfile)
        return redirect(root, code=301)
    else:
        zipfile = downloadzip(pid)
        # CHANGE
        #return zipfile
        # DEBUG1
        root = config['apiroot'] + "/collabs/static/tmp/" + zipfile
        # HTML
        #resp = make_response(render_template('progress.html', download=root))
        #return "<a href=\"" + str(root) + "\">Download dataset(s) with all papers (zip archive)</a>"
        #return resp
        return redirect(root, code=301)
示例#3
0
def download():
    (classification, pid, root, switch, datafile) = ('modern', '', '', 'modern', '')
    handle = ''
    config = configuration()
    cmd = "--insecure -u " + config['key'] + ": " + config['dataverseroot'] + "/dvn/api/data-deposit/v1.1/swordv2/statement/study/"

    config['remote'] = ''
    datafilter = {}
    datafilter['startyear'] = '1500'
    datafilter['endyear'] = '2010'
    datafilter['ctrlist'] = ''

    tmpdir = config['tmpdir']
    filerandom = randomword(10)
    #filerandom = '12345'
    arc = "data" + filerandom + ".zip"
    filename = filerandom
    finaldir = config['path'] + '/static/tmp'
    # ToDO
    if filename:
        finaldir = str(finaldir) + '/' + str(filename)
        tmpdir = str(tmpdir) + '/' + str(filename)

    try:
        os.mkdir(tmpdir)
        os.mkdir(finaldir)
    except:
        donothing = 'ok'

    if request.args.get('handle'):
        handle = request.args.get('handle')
    if request.args.get('type[0]') == 'historical':
        classification = request.args.get('type[0]')
	switch = classification
    if request.args.get('y[min]'):
        datafilter['startyear'] = request.args.get('y[min]')
    if request.args.get('y[max]'):
        datafilter['endyear'] = request.args.get('y[max]')

    # Select countries
    customcountrycodes = ''
    f = request.args
    for key in f.keys():
        if is_location(key):
            for value in sorted(f.getlist(key)):
                customcountrycodes = str(customcountrycodes) + str(value) + ','
    if customcountrycodes:
        customcountrycodes = customcountrycodes[:-1]
        datafilter['ctrlist'] = customcountrycodes

    if request.args.get('ctrlist'):
	datafilter['ctrlist'] = request.args.get('ctrlist')

    if request.args.get('pid'):
        pid = request.args.get('pid')
	ispanel = ''
	try:
	    (pids, pidslist) = pidfrompanel(pid)
	    handles = pids
	    handle = pids[0]
   	    match = re.match(r'Panel\[(.+)\]', pid)
    	    if match:
	        ispanel = 'yes'
	except:
	    handles = pid
	    handle = pids[0]
	    
	if ispanel:
	    dirforzip = ''
	    for handle in handles:
	        dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir)

            (header, panelcells, metadata, totalpanel) = build_panel(config, switch, handles, datafilter)
            filename = "paneldata.xlsx"
            metadata = []
	    datadir = config['webtest']
            localoutfile = panel2excel(dirforzip, filename, header, panelcells, metadata)
	    arc = 'dataarchive.zip'
	    compile2zip(dirforzip, arc)
	    root = config['apiroot'] + "/collabs/static/tmp/" + str(arc)
            return redirect(root, code=301)

    if classification:
	outfile = "clioinfra.xlsx"
	dirforzip = get_papers(config['dataverseroot'], config['key'], cmd, handle, tmpdir, arc, finaldir)
	#fullpath = config['webtest'] + "/" + str(outfile)
	fullpath = dirforzip

	# Check selection
	isselection = 'yes'
	if datafilter['startyear'] == '1500':
	    if datafilter['ctrlist'] == '':
		isselection = 'yes'

	if isselection:
	    (datafile, outfilefinal, finalsubset) = dataframe_compiler(config, fullpath, handle, classification, datafilter)
	    #return datafile.to_html()
	else:
	    # Copy original dataset
	    source = os.listdir(tmpdir)
	    for excelfile in source:
        	shutil.copy(tmpdir + '/' + excelfile, dirforzip)

	#return outfilefinal
        arc = 'dataarchive.zip'
  	if datafile:
	    arc = "%s_%s.zip" % (datafile, switch)
        compile2zip(dirforzip, arc)
        root = config['apiroot'] + "/collabs/static/tmp/" + str(arc)
	#root = config['apiroot'] + "/collabs/static/tmp/" + str(outfile)
	return redirect(root, code=301)
    else:
        zipfile = downloadzip(pid)
        # CHANGE
        #return zipfile
        # DEBUG1
        root = config['apiroot'] + "/collabs/static/tmp/" + zipfile
        # HTML
        #resp = make_response(render_template('progress.html', download=root))
        #return "<a href=\"" + str(root) + "\">Download dataset(s) with all papers (zip archive)</a>"
        #return resp
        return redirect(root, code=301)
示例#4
0
#!/usr/bin/python
import json
import sys
import os
import re
import simplejson
import pandas as pd
from datetime import datetime
sys.path.append(os.path.abspath(os.path.join(os.path.dirname("__file__"), '../modules')))
from config import configuration, dataverse2indicators, load_dataverse, findpid, load_metadata
from datacompiler import dataframe_compiler

config = configuration()
config['remote'] = 'on'
datafilter = {}
datafilter['startyear'] = '1500'
datafilter['endyear'] = '2010'
datafilter['ctrlist'] = ''
#datafilter['ctrlist'] = '528,14,18,67'
handle = 'hdl:10622/SO62N5'
switch = 'historical'
fullpath = '/home/dpe/tmp'

a = datetime.now()
(filetitle, fullpath, finalsubset) = dataframe_compiler(config, fullpath, handle, switch, datafilter)
b = datetime.now()
d = b - a
print "Time: " + str(d.seconds) + " seconds"
#print finalsubset.ix[67][1831]