def transform(): log.info("Geo-coding representatives...") for row in list(reg_representative.distinct('contact_lon', *KEYS)): if row.get('contact_lon'): continue geo = geocode(city=row.get('contact_town'), street=row.get('contact_street'), country=row.get('contact_country'), postalcode=row.get('contact_post_code')) if geo is not None: row['contact_geoname'] = geo.get('display_name') row['contact_lon'] = geo.get('lon') row['contact_lat'] = geo.get('lat') row['contact_nuts1'] = geo.get('nuts1') row['contact_nuts1_label'] = geo.get('nuts1_label') row['contact_nuts2'] = geo.get('nuts2') row['contact_nuts2_label'] = geo.get('nuts2_label') row['contact_nuts3'] = geo.get('nuts3') row['contact_nuts3_label'] = geo.get('nuts3_label') reg_representative.upsert(row, KEYS)
def load_rep(rep): #etlId = rep['etlId'] = "%s//%s" % (rep['identificationCode'], # rep['lastUpdateDate'].isoformat()) etlId = rep['etl_id'] = "%s//ALL" % rep['identification_code'] childBase = { 'representative_etl_id': etlId, 'representative_update_date': rep['last_update_date'] } if not rep['original_name']: log.error("Unnamed representative: %r", rep) return load_person(rep.pop('legal_person'), 'legal', childBase) load_person(rep.pop('head_person'), 'head', childBase) for actionField in rep.pop('action_fields'): rec = childBase.copy() rec['action_field'] = actionField reg_action_field.upsert(rec, ['representative_etl_id', 'action_field']) for interest in rep.pop('interests'): rec = childBase.copy() rec['interest'] = interest reg_interest.upsert(rec, ['representative_etl_id', 'interest']) for countryOfMember in rep.pop('country_of_members'): rec = childBase.copy() rec['country'] = countryOfMember reg_country_of_member.upsert(rec, ['representative_etl_id', 'country']) for organisation in rep.pop('organisations'): rec = childBase.copy() rec.update(organisation) rec['name'] = organisation['name'].strip() reg_organisation.upsert(rec, ['representative_etl_id', 'name']) load_finances(rep.pop('fd'), childBase) rep['name'] = rep['original_name'].strip() rep['network_extracted'] = False log.info("Representative: %s", rep['name']) reg_representative.upsert(rep, ['etl_id'])
def load_rep(rep): #etlId = rep['etlId'] = "%s//%s" % (rep['identificationCode'], # rep['lastUpdateDate'].isoformat()) etlId = rep['etl_id'] = "%s//ALL" % rep['identification_code'] childBase = {'representative_etl_id': etlId, 'representative_update_date': rep['last_update_date']} if not rep['original_name']: log.error("Unnamed representative: %r", rep) return load_person(rep.pop('legal_person'), 'legal', childBase) load_person(rep.pop('head_person'), 'head', childBase) for actionField in rep.pop('action_fields'): rec = childBase.copy() rec['action_field'] = actionField reg_action_field.upsert(rec, ['representative_etl_id', 'action_field']) for interest in rep.pop('interests'): rec = childBase.copy() rec['interest'] = interest reg_interest.upsert(rec, ['representative_etl_id', 'interest']) for countryOfMember in rep.pop('country_of_members'): rec = childBase.copy() rec['country'] = countryOfMember reg_country_of_member.upsert(rec, ['representative_etl_id', 'country']) for organisation in rep.pop('organisations'): rec = childBase.copy() rec.update(organisation) rec['name'] = organisation['name'].strip() reg_organisation.upsert(rec, ['representative_etl_id', 'name']) load_finances(rep.pop('fd'), childBase) rep['name'] = rep['original_name'].strip() rep['network_extracted'] = False log.info("Representative: %s", rep['name']) reg_representative.upsert(rep, ['etl_id'])
def code_subcategories(): for cat in list(reg_representative.distinct('sub_category')): cat['sub_category_id'] = SUBCATEGORIES.get(cat['sub_category']) reg_representative.upsert(cat, ['sub_category'])
def code_categories(): for cat in list(reg_representative.distinct('main_category')): cat['main_category_id'] = CATEGORIES[cat['main_category']] reg_representative.upsert(cat, ['main_category'])