Пример #1
0
    def export(self, meta):
        ind = {
            'indID': meta['indID'],
            'name': self.name_for_fieldname(meta['fieldname']),
            'units': meta['unit']
        }
        Indicator(**ind).save()

        for item in self.extract(meta['fieldname']):
            if not item.get('region'):
                logging.warn("No region in {}".format(meta))
                continue
            value = {
                'dsID': dsID,
                'region': item['region'],
                'period': meta.get('period') or get_period(meta['fieldname']),
                'value': item['value'],
                'indID': meta['indID'],
                'source': self.url,
                'is_number': meta.get('is_number') or True
            }
            if value['region'] and value['value']:
                print value
                Value(**value).save()
    code = table.filter(equal_to('Indicator Code'))

    years = code.fill(xypath.RIGHT)
    junction = indname.junction(years)
    for ind_cell, year_cell, value_cell in junction:
        vdict = dict(value)
        vdict['indID'] = ind_cell.value
        vdict['period'] = year_cell.value
        vdict['value'] = value_cell.value

        indicator = {'indID': vdict['indID']}
        nameunits = re.search('(.*)\((.*)\)', vdict['indID'])
        if nameunits:
            (indicator['name'], indicator['units']) = nameunits.groups()
        else:
            indicator['name'] = vdict['indID']
            indicator['units'] = 'uno'
        Indicator(**indicator).save()
        v = Value(**vdict)
        if not v.is_blank():
            v.save()
    print len(session.query(Value).filter(Value.dsID == 'World Bank').all())
    session.commit()

for country in getcountrylist():
    try:
        getcountry(country)
    except Exception, e:
        print country, e
        raise
Пример #3
0
dataset_data = {
    'dsID': 'unterm',
    'last_updated': "",
    'last_scraped': orm.now(),
    'name': 'unterm'
}

DataSet(**dataset_data).save()

indicator_data = [{
    'indID': 'unterm:' + i,
    'name': i,
    'units': ''
} for i in indicators]
for db_row in indicator_data:
    Indicator(**db_row).save()
"""Value: dsID, region, indID, period, value, source, is_number
   DataSet: dsID, last_updated, last_scraped, name
   Indicator: indID, name, units
   """

value_static = {'dsID': 'unterm', 'period': '', 'is_number': False}


def country_urls():
    formdata = dict([[h.partition(':')[0],
                      h.partition(':')[2]] for h in rawformdata.split('\n')])
    url = 'http://unterm.un.org/DGAACS/unterm.nsf/0/$searchForm?SearchView=&Seq=1'
    html = requests.post(url, data=formdata).content
    root = lxml.html.fromstring(html)
    root.make_links_absolute(url)
Пример #4
0
def parse_rank(socrata_id, countries):
    for country in countries:
        if 'hdi_rank' in country:
	    yield {"dsID": dsID,
		   "region": country['country'],
		   "period": 2012,  # TODO
		   "value": int(country['hdi_rank']),
		   "indID": "PSE220",
		   "source": data_url.format(socrata_id),
		   "is_number": True}
		   
                           
DataSet(**dataset).save()
maxdate=None
for socrata_code in lookup:
    ind = get_metadata(socrata_code)
    Indicator(**ind).save()
    for value in get_numbers(socrata_code):
        Value(**value).save()

print "rank"
ind = {"indID": "PSE220",
       "name": "HDI Rank",
       "units": "rank"}
Indicator(**ind).save()
for rank in get_rank("u2dx-y6wx"):
    Value(**rank).save()


Пример #5
0
def getindicator(ind="100106", overridefunction=None):
    if not overridefunction:
        baseurl = 'http://hdrstats.undp.org/en/indicators/display_cf_xls_indicator.cfm?indicator_id=%s&lang=en' % ind
        html = requests.get(baseurl).content
    else:
        html, baseurl = overridefunction()
    value = {
        'dsID': 'HDRStats',
        'indID': "HDR:" + ind,
        'source': baseurl,
        'is_number': True
    }

    dataset = {
        'dsID': 'HDRStats',
        'last_scraped': orm.now(),
        'name': 'Human Development Indicators, UNDP'
    }

    indicator = {'indID': "HDR:" + ind}
    hdi_indicator = {
        'indID': 'HDR:HDI Rank',
        'name': 'Human Development Index rank',
        'units': ''
    }
    Indicator(**hdi_indicator).save()
    DataSet(**dataset).save()
    print html
    exit(3)
    htmlio = StringIO.StringIO(html)
    messy = messytables.html.HTMLTableSet(htmlio)
    table = xypath.Table.from_messy(list(messy.tables)[0])
    root = lxml.html.fromstring(html)

    "get odd indicator / update time"
    indicator_text = root.xpath("//h2/text()")[-1]
    print indicator_text
    try:
        indicator_split, = re.findall("(.*)\(([^\(\)]+)\)", indicator_text)
    except ValueError:
        indicator_split = [indicator_text, ""]
    indicator['name'], indicator['units'] = indicator_split
    indicator['name'] = indicator['name'].strip()
    access_text, = [
        x.tail.strip() for x in root.xpath("//br")
        if str(x.tail) != "None" and x.tail.strip()
    ]
    access_date_raw, = re.findall('Accessed:(.*)from', access_text)
    dataset['last_updated'] = dateutil.parser.parse(
        access_date_raw).isoformat()
    print dataset['last_updated'], indicator['name'], "*", indicator['units']
    Indicator(**indicator).save()

    country_cell = table.filter("Country").assert_one()
    years = country_cell.fill(xypath.RIGHT).filter(lambda b: b.value != '')
    countries = country_cell.fill(xypath.DOWN)
    hdi_rank = table.filter("HDI Rank").assert_one()
    max_year = max(year.value for year in years)

    for i in countries.junction(hdi_rank):
        newvalue = dict(value)
        newvalue['indID'] = "HDR:HDI Rank"
        newvalue['region'] = get_region(i[0])
        newvalue['value'] = i[2].value.strip()
        newvalue[
            'period'] = 2012  # TODO Hard coded for now because year it pertains to is not clear
        if newvalue['value'].strip() != '..':
            Value(**newvalue).save()

    for i in countries.junction(years):
        newvalue = dict(value)
        newvalue['region'] = get_region(i[0])
        newvalue['value'] = i[2].value.strip()
        newvalue['period'] = i[1].value.strip()
        if newvalue['value'].strip() != '..':
            Value(**newvalue).save()
        print newvalue
    session.commit()