def opennames_update(org_name, attribute, value):
    opennames = nomenklatura.Dataset('public-bodies-uk')
    entity = opennames.entity_by_name(org_name)
    assert attribute in entity.attributes, \
        'Attribute "%s" not in entity: %r - it has: %r' % \
        (attribute, entity, entity.attributes)
    entity.attributes[attribute] = value
    print 'Writing %s=%s' % (attribute, value)
    entity.update()
def dgu_update(apikey):
    from ckanext.dgu.forms import validators
    import ckanapi
    dgu = ckanapi.RemoteCKAN('http://data.gov.uk',
                             user_agent=__file__,
                             apikey=apikey)
    dgu_categories = dict(validators.categories)
    dgu_categories_by_title = dict(
        (title, id) for id, title in validators.categories)
    stats_category = Stats()
    stats_state = Stats()
    org_names_request = requests.get(
        'http://data.gov.uk/api/action/organization_list')
    # NB Not using all_fields as it doesn't include extras, like category
    org_names = json.loads(org_names_request.content)['result']
    opennames = nomenklatura.Dataset('public-bodies-uk')
    for org_name in org_names:
        org_request = requests.get(
            'http://data.gov.uk/api/action/organization_show?id=%s' % org_name)
        org = json.loads(org_request.content)['result']
        # convert the extras into a dict
        org['extras'] = dict(
            (extra['key'], extra['value']) for extra in org['extras'])
        try:
            entity = opennames.entity_by_name(org['title'])
        except NoMatch:
            # BTW it hasn't been added for review
            msg = 'Org not found in nomenklatura'
            print stats_category.add(msg, org_name)
            stats_state.add(msg, org_name)
            continue
        entity = entity.dereference()
        changed_org = dgu_update_category(org_name, org, entity,
                                          stats_category, dgu_categories,
                                          dgu_categories_by_title)
        if changed_org:
            # convert the extras back into a list of dicts
            org['extras'] = [{
                'key': key,
                'value': value
            } for key, value in org['extras'].items()]
            try:
                org = dgu.action.organization_update(**org)
            except ckanapi.errors.CKANAPIError, e:
                if '504 Gateway Time-out' in str(e):
                    print stats_category.add('Time-out writing', org_name)
                else:
                    raise
def opennames_swap_alias(org_name):
    # NB there are issues - see https://github.com/pudo/nomenklatura/issues/35
    # get all the entities
    opennames = nomenklatura.Dataset('public-bodies-uk')
    entity_to_make_canonical = opennames.entity_by_name(org_name)
    assert entity_to_make_canonical.is_alias
    entity_that_was_canonical = entity_to_make_canonical.canonical
    other_aliases = [
        e for e in entity_that_was_canonical.aliases
        if e.id != entity_to_make_canonical.id
    ]
    # swap the aliases
    entity_that_was_canonical.__data__[
        'canonical'] = entity_to_make_canonical.__data__
    entity_to_make_canonical.__data__['canonical'] = None
    entity_to_make_canonical.attributes = entity_that_was_canonical.attributes
    entity_that_was_canonical.attributes = {}
    for entity in other_aliases:
        entity.__data__['canonical'] = entity_to_make_canonical.__data__
    # write
    for entity in [entity_that_was_canonical, entity_to_make_canonical
                   ] + other_aliases:
        entity.update()
Exemplo n.º 4
0
def nk_connect(dataset):
    if not dataset in NK_DATASETS:
        NK_DATASETS[dataset] = nomenklatura.Dataset(
            dataset, api_key='beaf2ff2-ea94-47c0-942f-1613a09056c2')
    return NK_DATASETS[dataset]
def _merge_org(org_title, attributes, merge_attributes, stats, messages):
    '''
    attributes are set on opennames if there is a value specified (will
    overwrite on opennames).  merge_attributes are set on opennames if there is
    a value specified and no value exists on opennames. If there a different
    value exists on opennames already then this is noted in
    messages.
    '''
    opennames = nomenklatura.Dataset('public-bodies-uk')

    # remove blank attributes
    attributes = dict((k, v) for k, v in attributes.items() if v)
    merge_attributes = dict((k, v) for k, v in merge_attributes.items() if v)
    try:
        entity = opennames.entity_by_name(org_title)
    except NoMatch:
        attributes.update(
            dict((k, v) for k, v in merge_attributes.items() if v))
        opennames.create_entity(org_title,
                                attributes=attributes,
                                reviewed=False)
        print stats.add('created', org_title)
    else:
        # It exists, but might need its attributes adding/updating

        # Check we've not done this org before
        all_attributes = dict(attributes.items() + merge_attributes.items())
        base_entity = entity.canonical if entity.is_alias else entity
        if base_entity.name in orgs_processed:
            msg = 'DUPLICATE - ignored'
            if entity.is_alias:
                msg += ' (alias of "%s")' % entity.canonical.name
            else:
                aliases = [e.name for e in entity.aliases]
                if aliases:
                    msg += ' (has alias "%s")' % '", "'.join(aliases)
            diff = dicts_differences(all_attributes,
                                     orgs_processed[base_entity.name],
                                     ignore_keys=('govuk-id', 'govuk-url'))
            if not diff:
                print stats.add(
                    'Repeat org with identical attributes - '
                    'ignored', org_title)
                return
            msg += ' - differences: %s' % diff
            messages.append(Message(entity.name, msg))
            print stats.add('Repeat org with different attributes - '
                            'review', org_title)
            return
        orgs_processed[base_entity.name] = all_attributes

        entity = base_entity

        needs_update = False
        for key, value in attributes.items():
            if key not in entity.attributes or \
                    entity.attributes[key] != value:
                needs_update = True
                entity.attributes[key] = value
        for key, value in merge_attributes.items():
            if key not in entity.attributes:
                needs_update = True
                entity.attributes[key] = value
            elif entity.attributes[key] and \
                    entity.attributes[key].lower() != value.lower():
                messages.append(
                    AttributeConflict(org_title, key, entity.attributes[key],
                                      value))

        if needs_update:
            entity.update()
            print truncate(stats.add('updated', org_title), 78)
        else:
            print truncate(stats.add('unchanged', org_title), 78)