def import_schemas(self, schema_filepath): from ckan import model from ckanext.dgu.model.schema_codelist import Schema # Load file with schemas schema_dicts = [] with open(schema_filepath) as f: for line in f.readlines(): if not line.strip(): continue schema_dict = json.loads(line) schema_dicts.append(schema_dict) # Create/update in the db for schema in schema_dicts: if 'id' in schema: existing_schema = Schema.get(schema['id']) else: existing_schema = Schema.by_title(schema['title']) if existing_schema: schema['id'] = existing_schema.id for k, v in schema.items(): setattr(existing_schema, k, v) schema_obj = existing_schema else: schema_obj = Schema(**schema) model.Session.add(schema_obj) model.Session.commit() # Print JSONL with ids, in case you want to save with IDs print json.dumps(schema_obj.as_dict()) model.Session.remove()
def schema_codelist_validator(key, data, errors, context): from ckanext.dgu.model.schema_codelist import Schema, Codelist for i, schema_ref in enumerate(data[key]): if not schema_ref: # drop-down has no selection - ignore continue # form gives an ID. API might give a title. if key == ('schema',): obj = Schema.get(schema_ref) or Schema.by_title(schema_ref) or \ Schema.by_url(schema_ref) elif key == ('codelist',): obj = Codelist.get(schema_ref) or Codelist.by_title(schema_ref) or\ Codelist.by_url(schema_ref) else: raise NotImplementedError('Bad key: %s' % key) if not obj: raise Invalid('%s id does not exist: %r' % (key[0], schema_ref)) # write the ID in case it came in via the API and was a URL or title data[key][i] = obj.id
def add_datasets_to_results(datasets, result): for dataset in datasets: if dataset['name'] not in result['dataset_names']: result['dataset_names'].append(dataset['name']) result['dataset_titles'].append(dataset['title']) schema_applied = True if schema.dgu_schema_name in \ [s['title'] for s in dataset.get('schema', [])] \ else False result['dataset_schema_applied'].append(schema_applied) if not schema_applied and options.write: pkg = model.Package.get(dataset['name']) schema_obj = Schema.by_title(schema.dgu_schema_name) assert schema_obj, schema.dgu_schema_name try: schema_ids = json.loads(pkg.extras.get('schema') or '[]') except ValueError: log.error('Not valid JSON in schema field: %s %r', dataset['name'], pkg.extras.get('schema')) schema_ids = [] schema_ids.append(schema_obj.id) pkg.extras['schema'] = json.dumps(schema_ids)
def create_test_data(self): from ckan import plugins from ckan import model from ckanext.dgu.model.schema_codelist import Schema, Codelist pt = plugins.toolkit context = {'model': model, 'user': '******'} # Create schemas schemas = [ dict(url='http://lga.org/toilet?v0.3', title='Toilet locations'), dict(url='http://spend.com/25', title='25k Spend'), dict( url='http://environment.data.gov.uk/def/bathing-water-quality/', title='Bathing water quality (ontology)'), dict(url='http://environment.data.gov.uk/def/bathing-water/', title='Bathing water (ontology)'), dict(url='http://environment.data.gov.uk/def/bwq-cc-2012/', title='Bathing water classifications'), dict(url='http://location.data.gov.uk/def/ef/SamplingPoint/', title='Sampling point (environmental monitoring) ontology'), dict(url='http://www.w3.org/2006/time', title='Time (OWL ontology)'), dict(url='http://purl.org/linked-data/cube', title='Data cube (vocabulary)'), dict(url='http://www.w3.org/2004/02/skos/core', title='Simple Knowledge Organization System (SKOS vocabulary)' ), dict(url='http://purl.org/dc/terms/', title='DCMI Metadata Terms (vocabulary)'), dict(url='http://xmlns.com/foaf/0.1/', title='FOAF Vocabulary'), dict(url='http://purl.org/linked-data/sdmx/2009/sdmx-attribute', title='Statistical Data and Metadata Exchange (SDMX)'), dict(url='http://www.w3.org/2003/01/geo/wgs84_pos', title='WGS84 Geo Positioning vocabulary'), dict(url='http://data.ordnancesurvey.co.uk/ontology/geometry/', title='Ordnance Survey Geometry (ontology)'), ] for schema in schemas: existing_schema = Schema.by_title(schema['title']) if existing_schema: schema['id'] = existing_schema.id for k, v in schema.items(): setattr(existing_schema, k, v) else: model.Session.add(Schema(**schema)) model.repo.commit_and_remove() codelists = [ dict( url= 'http://environment.data.gov.uk/registry/def/water-quality/_sampling_point_types', title='Water sampling point types'), dict( url= 'http://environment.data.gov.uk/registry/def/water-quality/sampling_mechanisms', title='Water quality sampling mechanisms'), ] for codelist in codelists: existing_list = Codelist.by_title(codelist['title']) if existing_list: codelist['id'] = existing_list.id for k, v in codelist.items(): setattr(existing_list, k, v) else: model.Session.add(Codelist(**codelist)) model.repo.commit_and_remove() # Create org org = dict(name='oxford', title='Oxford', type='organization', is_organization=True, category='local-council') existing_org = model.Group.get(org['name']) action = 'create' if not existing_org else 'update' if existing_org: org['id'] = existing_org.id org = pt.get_action('organization_%s' % action)(context, org) # Create datasets defaults = dict(license_id='uk-ogl', owner_org=org['id'], notes='This is a test') datasets = [ dict(name='oxford-toilets', title='Oxford toilets', codelist=[], schema=[Schema.by_title('Toilet locations').id]), dict( name='bathing-waters', title='Bathing waters', codelist=[ Codelist.by_title(title).id for title in ('Water sampling point types', 'Water quality sampling mechanisms') ], schema=[ Schema.by_url(url).id for url in ( 'http://environment.data.gov.uk/def/bathing-water-quality/', 'http://environment.data.gov.uk/def/bathing-water/', 'http://environment.data.gov.uk/def/bwq-cc-2012/', 'http://location.data.gov.uk/def/ef/SamplingPoint/', 'http://www.w3.org/2006/time', 'http://purl.org/linked-data/cube', 'http://www.w3.org/2004/02/skos/core', 'http://purl.org/dc/terms/', 'http://xmlns.com/foaf/0.1/', 'http://purl.org/linked-data/sdmx/2009/sdmx-attribute', 'http://www.w3.org/2003/01/geo/wgs84_pos', 'http://data.ordnancesurvey.co.uk/ontology/geometry/', ) ]) ] for dataset in datasets: dataset.update(defaults) existing_dataset = model.Package.get(dataset['name']) action = 'create' if not existing_dataset else 'update' if existing_dataset: dataset['id'] = existing_dataset.id dataset = pt.get_action('dataset_%s' % action)(context, dataset) print 'Datasets: ', ' '.join([dataset['name'] for dataset in datasets])
def create_test_data(self): from ckan import plugins from ckan import model from ckanext.dgu.model.schema_codelist import Schema, Codelist pt = plugins.toolkit context = {'model': model, 'user': '******'} # Create schemas schemas = [dict(url='http://lga.org/toilet?v0.3', title='Toilet locations'), dict(url='http://spend.com/25', title='25k Spend'), dict(url='http://environment.data.gov.uk/def/bathing-water-quality/', title='Bathing water quality (ontology)'), dict(url='http://environment.data.gov.uk/def/bathing-water/', title='Bathing water (ontology)'), dict(url='http://environment.data.gov.uk/def/bwq-cc-2012/', title='Bathing water classifications'), dict(url='http://location.data.gov.uk/def/ef/SamplingPoint/', title='Sampling point (environmental monitoring) ontology'), dict(url='http://www.w3.org/2006/time', title='Time (OWL ontology)'), dict(url='http://purl.org/linked-data/cube', title='Data cube (vocabulary)'), dict(url='http://www.w3.org/2004/02/skos/core', title='Simple Knowledge Organization System (SKOS vocabulary)'), dict(url='http://purl.org/dc/terms/', title='DCMI Metadata Terms (vocabulary)'), dict(url='http://xmlns.com/foaf/0.1/', title='FOAF Vocabulary'), dict(url='http://purl.org/linked-data/sdmx/2009/sdmx-attribute', title='Statistical Data and Metadata Exchange (SDMX)'), dict(url='http://www.w3.org/2003/01/geo/wgs84_pos', title='WGS84 Geo Positioning vocabulary'), dict(url='http://data.ordnancesurvey.co.uk/ontology/geometry/', title='Ordnance Survey Geometry (ontology)'), ] for schema in schemas: existing_schema = Schema.by_title(schema['title']) if existing_schema: schema['id'] = existing_schema.id for k, v in schema.items(): setattr(existing_schema, k, v) else: model.Session.add(Schema(**schema)) model.repo.commit_and_remove() codelists = [ dict(url='http://environment.data.gov.uk/registry/def/water-quality/_sampling_point_types', title='Water sampling point types'), dict(url='http://environment.data.gov.uk/registry/def/water-quality/sampling_mechanisms', title='Water quality sampling mechanisms'), ] for codelist in codelists: existing_list = Codelist.by_title(codelist['title']) if existing_list: codelist['id'] = existing_list.id for k, v in codelist.items(): setattr(existing_list, k, v) else: model.Session.add(Codelist(**codelist)) model.repo.commit_and_remove() # Create org org = dict(name='oxford', title='Oxford', type='organization', is_organization=True, category='local-council') existing_org = model.Group.get(org['name']) action = 'create' if not existing_org else 'update' if existing_org: org['id'] = existing_org.id org = pt.get_action('organization_%s' % action)(context, org) # Create datasets defaults = dict(license_id='uk-ogl', owner_org=org['id'], notes='This is a test') datasets = [ dict(name='oxford-toilets', title='Oxford toilets', codelist=[], schema=[Schema.by_title('Toilet locations').id]), dict(name='bathing-waters', title='Bathing waters', codelist=[Codelist.by_title(title).id for title in ('Water sampling point types', 'Water quality sampling mechanisms')], schema=[Schema.by_url(url).id for url in ( 'http://environment.data.gov.uk/def/bathing-water-quality/', 'http://environment.data.gov.uk/def/bathing-water/', 'http://environment.data.gov.uk/def/bwq-cc-2012/', 'http://location.data.gov.uk/def/ef/SamplingPoint/', 'http://www.w3.org/2006/time', 'http://purl.org/linked-data/cube', 'http://www.w3.org/2004/02/skos/core', 'http://purl.org/dc/terms/', 'http://xmlns.com/foaf/0.1/', 'http://purl.org/linked-data/sdmx/2009/sdmx-attribute', 'http://www.w3.org/2003/01/geo/wgs84_pos', 'http://data.ordnancesurvey.co.uk/ontology/geometry/', )] )] for dataset in datasets: dataset.update(defaults) existing_dataset = model.Package.get(dataset['name']) action = 'create' if not existing_dataset else 'update' if existing_dataset: dataset['id'] = existing_dataset.id dataset = pt.get_action('dataset_%s' % action)(context, dataset) print 'Datasets: ', ' '.join([dataset['name'] for dataset in datasets])