def transform():
    log.info("Geo-coding representatives...")
    for row in list(reg_representative.distinct('contact_lon', *KEYS)):
        if row.get('contact_lon'):
            continue
        geo = geocode(city=row.get('contact_town'), street=row.get('contact_street'),
            country=row.get('contact_country'), postalcode=row.get('contact_post_code'))
        if geo is not None:
            row['contact_geoname'] = geo.get('display_name')
            row['contact_lon'] = geo.get('lon')
            row['contact_lat'] = geo.get('lat')
            row['contact_nuts1'] = geo.get('nuts1')
            row['contact_nuts1_label'] = geo.get('nuts1_label')
            row['contact_nuts2'] = geo.get('nuts2')
            row['contact_nuts2_label'] = geo.get('nuts2_label')
            row['contact_nuts3'] = geo.get('nuts3')
            row['contact_nuts3_label'] = geo.get('nuts3_label')
            reg_representative.upsert(row, KEYS)
Exemplo n.º 2
0
def transform():
    log.info("Geo-coding representatives...")
    for row in list(reg_representative.distinct('contact_lon', *KEYS)):
        if row.get('contact_lon'):
            continue
        geo = geocode(city=row.get('contact_town'),
                      street=row.get('contact_street'),
                      country=row.get('contact_country'),
                      postalcode=row.get('contact_post_code'))
        if geo is not None:
            row['contact_geoname'] = geo.get('display_name')
            row['contact_lon'] = geo.get('lon')
            row['contact_lat'] = geo.get('lat')
            row['contact_nuts1'] = geo.get('nuts1')
            row['contact_nuts1_label'] = geo.get('nuts1_label')
            row['contact_nuts2'] = geo.get('nuts2')
            row['contact_nuts2_label'] = geo.get('nuts2_label')
            row['contact_nuts3'] = geo.get('nuts3')
            row['contact_nuts3_label'] = geo.get('nuts3_label')
            reg_representative.upsert(row, KEYS)
Exemplo n.º 3
0
def load_rep(rep):
    #etlId = rep['etlId'] = "%s//%s" % (rep['identificationCode'],
    #                                   rep['lastUpdateDate'].isoformat())
    etlId = rep['etl_id'] = "%s//ALL" % rep['identification_code']
    childBase = {
        'representative_etl_id': etlId,
        'representative_update_date': rep['last_update_date']
    }
    if not rep['original_name']:
        log.error("Unnamed representative: %r", rep)
        return
    load_person(rep.pop('legal_person'), 'legal', childBase)
    load_person(rep.pop('head_person'), 'head', childBase)
    for actionField in rep.pop('action_fields'):
        rec = childBase.copy()
        rec['action_field'] = actionField
        reg_action_field.upsert(rec, ['representative_etl_id', 'action_field'])

    for interest in rep.pop('interests'):
        rec = childBase.copy()
        rec['interest'] = interest
        reg_interest.upsert(rec, ['representative_etl_id', 'interest'])

    for countryOfMember in rep.pop('country_of_members'):
        rec = childBase.copy()
        rec['country'] = countryOfMember
        reg_country_of_member.upsert(rec, ['representative_etl_id', 'country'])

    for organisation in rep.pop('organisations'):
        rec = childBase.copy()
        rec.update(organisation)
        rec['name'] = organisation['name'].strip()
        reg_organisation.upsert(rec, ['representative_etl_id', 'name'])

    load_finances(rep.pop('fd'), childBase)
    rep['name'] = rep['original_name'].strip()
    rep['network_extracted'] = False
    log.info("Representative: %s", rep['name'])
    reg_representative.upsert(rep, ['etl_id'])
def load_rep(rep):
    #etlId = rep['etlId'] = "%s//%s" % (rep['identificationCode'],
    #                                   rep['lastUpdateDate'].isoformat())
    etlId = rep['etl_id'] = "%s//ALL" % rep['identification_code']
    childBase = {'representative_etl_id': etlId,
                 'representative_update_date': rep['last_update_date']}
    if not rep['original_name']:
        log.error("Unnamed representative: %r", rep)
        return
    load_person(rep.pop('legal_person'), 'legal', childBase)
    load_person(rep.pop('head_person'), 'head', childBase)
    for actionField in rep.pop('action_fields'):
        rec = childBase.copy()
        rec['action_field'] = actionField
        reg_action_field.upsert(rec, ['representative_etl_id', 'action_field'])

    for interest in rep.pop('interests'):
        rec = childBase.copy()
        rec['interest'] = interest
        reg_interest.upsert(rec, ['representative_etl_id', 'interest'])

    for countryOfMember in rep.pop('country_of_members'):
        rec = childBase.copy()
        rec['country'] = countryOfMember
        reg_country_of_member.upsert(rec, ['representative_etl_id', 'country'])

    for organisation in rep.pop('organisations'):
        rec = childBase.copy()
        rec.update(organisation)
        rec['name'] = organisation['name'].strip()
        reg_organisation.upsert(rec, ['representative_etl_id', 'name'])

    load_finances(rep.pop('fd'), childBase)
    rep['name'] = rep['original_name'].strip()
    rep['network_extracted'] = False
    log.info("Representative: %s", rep['name'])
    reg_representative.upsert(rep, ['etl_id'])
def code_subcategories():
    for cat in list(reg_representative.distinct('sub_category')):
        cat['sub_category_id'] = SUBCATEGORIES.get(cat['sub_category'])
        reg_representative.upsert(cat, ['sub_category'])
def code_categories():
    for cat in list(reg_representative.distinct('main_category')):
        cat['main_category_id'] = CATEGORIES[cat['main_category']]
        reg_representative.upsert(cat, ['main_category'])