def opennames_update(org_name, attribute, value): opennames = nomenklatura.Dataset('public-bodies-uk') entity = opennames.entity_by_name(org_name) assert attribute in entity.attributes, \ 'Attribute "%s" not in entity: %r - it has: %r' % \ (attribute, entity, entity.attributes) entity.attributes[attribute] = value print 'Writing %s=%s' % (attribute, value) entity.update()
def dgu_update(apikey): from ckanext.dgu.forms import validators import ckanapi dgu = ckanapi.RemoteCKAN('http://data.gov.uk', user_agent=__file__, apikey=apikey) dgu_categories = dict(validators.categories) dgu_categories_by_title = dict( (title, id) for id, title in validators.categories) stats_category = Stats() stats_state = Stats() org_names_request = requests.get( 'http://data.gov.uk/api/action/organization_list') # NB Not using all_fields as it doesn't include extras, like category org_names = json.loads(org_names_request.content)['result'] opennames = nomenklatura.Dataset('public-bodies-uk') for org_name in org_names: org_request = requests.get( 'http://data.gov.uk/api/action/organization_show?id=%s' % org_name) org = json.loads(org_request.content)['result'] # convert the extras into a dict org['extras'] = dict( (extra['key'], extra['value']) for extra in org['extras']) try: entity = opennames.entity_by_name(org['title']) except NoMatch: # BTW it hasn't been added for review msg = 'Org not found in nomenklatura' print stats_category.add(msg, org_name) stats_state.add(msg, org_name) continue entity = entity.dereference() changed_org = dgu_update_category(org_name, org, entity, stats_category, dgu_categories, dgu_categories_by_title) if changed_org: # convert the extras back into a list of dicts org['extras'] = [{ 'key': key, 'value': value } for key, value in org['extras'].items()] try: org = dgu.action.organization_update(**org) except ckanapi.errors.CKANAPIError, e: if '504 Gateway Time-out' in str(e): print stats_category.add('Time-out writing', org_name) else: raise
def opennames_swap_alias(org_name): # NB there are issues - see https://github.com/pudo/nomenklatura/issues/35 # get all the entities opennames = nomenklatura.Dataset('public-bodies-uk') entity_to_make_canonical = opennames.entity_by_name(org_name) assert entity_to_make_canonical.is_alias entity_that_was_canonical = entity_to_make_canonical.canonical other_aliases = [ e for e in entity_that_was_canonical.aliases if e.id != entity_to_make_canonical.id ] # swap the aliases entity_that_was_canonical.__data__[ 'canonical'] = entity_to_make_canonical.__data__ entity_to_make_canonical.__data__['canonical'] = None entity_to_make_canonical.attributes = entity_that_was_canonical.attributes entity_that_was_canonical.attributes = {} for entity in other_aliases: entity.__data__['canonical'] = entity_to_make_canonical.__data__ # write for entity in [entity_that_was_canonical, entity_to_make_canonical ] + other_aliases: entity.update()
def nk_connect(dataset): if not dataset in NK_DATASETS: NK_DATASETS[dataset] = nomenklatura.Dataset( dataset, api_key='beaf2ff2-ea94-47c0-942f-1613a09056c2') return NK_DATASETS[dataset]
def _merge_org(org_title, attributes, merge_attributes, stats, messages): ''' attributes are set on opennames if there is a value specified (will overwrite on opennames). merge_attributes are set on opennames if there is a value specified and no value exists on opennames. If there a different value exists on opennames already then this is noted in messages. ''' opennames = nomenklatura.Dataset('public-bodies-uk') # remove blank attributes attributes = dict((k, v) for k, v in attributes.items() if v) merge_attributes = dict((k, v) for k, v in merge_attributes.items() if v) try: entity = opennames.entity_by_name(org_title) except NoMatch: attributes.update( dict((k, v) for k, v in merge_attributes.items() if v)) opennames.create_entity(org_title, attributes=attributes, reviewed=False) print stats.add('created', org_title) else: # It exists, but might need its attributes adding/updating # Check we've not done this org before all_attributes = dict(attributes.items() + merge_attributes.items()) base_entity = entity.canonical if entity.is_alias else entity if base_entity.name in orgs_processed: msg = 'DUPLICATE - ignored' if entity.is_alias: msg += ' (alias of "%s")' % entity.canonical.name else: aliases = [e.name for e in entity.aliases] if aliases: msg += ' (has alias "%s")' % '", "'.join(aliases) diff = dicts_differences(all_attributes, orgs_processed[base_entity.name], ignore_keys=('govuk-id', 'govuk-url')) if not diff: print stats.add( 'Repeat org with identical attributes - ' 'ignored', org_title) return msg += ' - differences: %s' % diff messages.append(Message(entity.name, msg)) print stats.add('Repeat org with different attributes - ' 'review', org_title) return orgs_processed[base_entity.name] = all_attributes entity = base_entity needs_update = False for key, value in attributes.items(): if key not in entity.attributes or \ entity.attributes[key] != value: needs_update = True entity.attributes[key] = value for key, value in merge_attributes.items(): if key not in entity.attributes: needs_update = True entity.attributes[key] = value elif entity.attributes[key] and \ entity.attributes[key].lower() != value.lower(): messages.append( AttributeConflict(org_title, key, entity.attributes[key], value)) if needs_update: entity.update() print truncate(stats.add('updated', org_title), 78) else: print truncate(stats.add('unchanged', org_title), 78)