def convert_file(filename): doc = etree.parse(filename) engine = util.make_engine() base = {'source_file': filename, 'source_id': 0} for i, commitment in enumerate(doc.findall('//commitment')): base.update({'source_line': commitment.sourceline, 'source_contract_id': i}) write = lambda r: write_row(engine, base, r) convert_commitment(commitment, write)
def merge(): engine = util.make_engine() table = sl.get_table(engine, 'fts') for row in sl.distinct(engine, table, 'beneficiary', 'country_code'): canonical, uri, score = lookup(row.get('beneficiary'), row.get('country_code'), engine) row['beneficiary_canonical'] = canonical row['beneficiary_uri'] = uri row['beneficiary_score'] = score sl.upsert(engine, table, row, ['beneficiary', 'country'])
def merge(): read_countries() engine = util.make_engine() table = sl.get_table(engine, 'fts') for row in sl.distinct(engine, table, 'country'): country = row.get('country') data = match(country) row['country_code'] = data.get('iso_3166-1_2') row['country_common'] = data.get('common') sl.upsert(engine, table, row, ['country'])
def merge(): geocoder = shapegeocode.geocoder( 'nuts2-shapefile/data/NUTS_RG_10M_2006.shp', filter=lambda r: r['STAT_LEVL_'] == 3) regions = load_region_hierarchy() engine = util.make_engine() table = sl.get_table(engine, 'fts') for row in sl.distinct(engine, table, *KEYS): loc = geocode(row) if loc is None: continue row.update(loc) reg = find_region(geocoder, regions, row) row.update(reg) log.info("Geocoded: %s/%s - %s", row['lat'], row['lon'], row.get('nuts3_label')) sl.upsert(engine, table, row, KEYS)
def merge(codes): engine = util.make_engine() table = sl.get_table(engine, 'fts') for level in LEVELS: src_col = 'budget_code' if level == 'item' else level for data in sl.distinct(engine, table, src_col): value = data[src_col] if level == 'item' and len(value) < 11: continue if value not in codes: print value continue code_data = codes.get(value) data['%s_name' % level] = value data['%s_label' % level] = code_data['label'] data['%s_description' % level] = code_data['description'] data['%s_legal_basis' % level] = code_data['legal_basis'] sl.upsert(engine, table, data, [src_col])