Exemplo n.º 1
0
    def import_codelists(self, codelist_filepath):
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Codelist

        # Load file with codelists
        codelist_dicts = []
        with open(codelist_filepath) as f:
            for line in f.readlines():
                if not line.strip():
                    continue
                codelist_dict = json.loads(line)
                codelist_dicts.append(codelist_dict)

        # Create/update in the db
        for codelist in codelist_dicts:
            if 'id' in codelist:
                existing_codelist = Codelist.by_title(codelist['title'])
            else:
                existing_codelist = Codelist.by_title(codelist['title'])
            if existing_codelist:
                codelist['id'] = existing_codelist.id
                for k, v in codelist.items():
                    setattr(existing_codelist, k, v)
                codelist_obj = existing_codelist
            else:
                codelist_obj = Codelist(**codelist)
                model.Session.add(codelist_obj)
            model.Session.commit()
            # Print JSONL with ids, in case you want to save with IDs
            print json.dumps(codelist_obj.as_dict())
        model.Session.remove()
Exemplo n.º 2
0
    def import_codelists(self, codelist_filepath):
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Codelist

        # Load file with codelists
        codelist_dicts = []
        with open(codelist_filepath) as f:
            for line in f.readlines():
                if not line.strip():
                    continue
                codelist_dict = json.loads(line)
                codelist_dicts.append(codelist_dict)

        # Create/update in the db
        for codelist in codelist_dicts:
            if 'id' in codelist:
                existing_codelist = Codelist.by_title(codelist['title'])
            else:
                existing_codelist = Codelist.by_title(codelist['title'])
            if existing_codelist:
                codelist['id'] = existing_codelist.id
                for k, v in codelist.items():
                    setattr(existing_codelist, k, v)
                codelist_obj = existing_codelist
            else:
                codelist_obj = Codelist(**codelist)
                model.Session.add(codelist_obj)
            model.Session.commit()
            # Print JSONL with ids, in case you want to save with IDs
            print json.dumps(codelist_obj.as_dict())
        model.Session.remove()
Exemplo n.º 3
0
    def add_schema(cls, pkg_dict):
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        try:
            schema_ids = json.loads(pkg_dict.get('schema') or '[]')
        except ValueError:
            log.error('Not valid JSON in schema field: %s %r',
                      pkg_dict['name'], pkg_dict.get('schema'))
            schemas = None
        schemas = []
        for schema_id in schema_ids:
            schemas.append(Schema.get(schema_id).title)
        pkg_dict['schema_multi'] = schemas
        log.debug('Schema: %s', ' '.join(schemas))

        try:
            codelist_ids = json.loads(pkg_dict.get('codelist') or '[]')
        except ValueError:
            log.error('Not valid JSON in codelists field: %s %r',
                      pkg_dict['name'], pkg_dict.get('codelist'))
            codelists = None
        codelists = []
        for codelist_id in codelist_ids:
            codelists.append(Codelist.get(codelist_id).title)
        pkg_dict['codelist_multi'] = codelists
        log.debug('Code lists: %s', ' '.join(codelists))
Exemplo n.º 4
0
    def add_schema(cls, pkg_dict):
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        try:
            schema_ids = json.loads(pkg_dict.get('schema') or '[]')
        except ValueError:
            log.error('Not valid JSON in schema field: %s %r',
                      pkg_dict['name'], pkg_dict.get('schema'))
            schemas = None
        schemas = []
        for schema_id in schema_ids:
            schemas.append(Schema.get(schema_id).title)
        pkg_dict['schema_multi'] = schemas
        log.debug('Schema: %s', ' '.join(schemas))

        try:
            codelist_ids = json.loads(pkg_dict.get('codelist') or '[]')
        except ValueError:
            log.error('Not valid JSON in codelists field: %s %r',
                      pkg_dict['name'], pkg_dict.get('codelist'))
            codelists = None
        codelists = []
        for codelist_id in codelist_ids:
            codelists.append(Codelist.get(codelist_id).title)
        pkg_dict['codelist_multi'] = codelists
        log.debug('Code lists: %s', ' '.join(codelists))
Exemplo n.º 5
0
def schema_codelist_validator(key, data, errors, context):
    from ckanext.dgu.model.schema_codelist import Schema, Codelist
    for i, schema_ref in enumerate(data[key]):
        if not schema_ref:
            # drop-down has no selection - ignore
            continue
        # form gives an ID. API might give a title.
        if key == ('schema',):
            obj = Schema.get(schema_ref) or Schema.by_title(schema_ref) or \
                    Schema.by_url(schema_ref)
        elif key == ('codelist',):
            obj = Codelist.get(schema_ref) or Codelist.by_title(schema_ref) or\
                    Codelist.by_url(schema_ref)
        else:
            raise NotImplementedError('Bad key: %s' % key)
        if not obj:
            raise Invalid('%s id does not exist: %r' % (key[0], schema_ref))
        # write the ID in case it came in via the API and was a URL or title
        data[key][i] = obj.id
Exemplo n.º 6
0
def id_to_dict(key, data, errors, context):
    from ckanext.dgu.model.schema_codelist import Schema, Codelist
    for i, id_ in enumerate(data[key]):
        if key == ('schema',):
            obj = Schema.get(id_)
        elif key == ('codelist',):
            obj = Codelist.get(id_)
        else:
            raise NotImplementedError('Bad key: %s' % key)
        if not obj:
            raise Invalid('%s id does not exist: %s' % (key, id_))
        data[key][i] = obj.as_dict()
Exemplo n.º 7
0
    @classmethod
    def add_schema(cls, pkg_dict):
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        try:
            schema_ids = json.loads(pkg_dict.get('schema') or '[]')
        except ValueError:
            log.error('Not valid JSON in schema field: %s %r',
                      pkg_dict['name'], pkg_dict.get('schema'))
            schema_ids = None
        schemas = []
        for schema_id in schema_ids:
            try:
                schemas.append(Schema.get(schema_id).title)
            except AttributeError, e:
                log.error('Invalid schema_id: %r %s', schema_id, e)
        pkg_dict['schema_multi'] = schemas
        #log.debug('Schema: %s', ' '.join(schemas))

        try:
            codelist_ids = json.loads(pkg_dict.get('codelist') or '[]')
        except ValueError:
            log.error('Not valid JSON in codelists field: %s %r',
                      pkg_dict['name'], pkg_dict.get('codelist'))
            codelists = None
        codelists = []
        for codelist_id in codelist_ids:
            codelists.append(Codelist.get(codelist_id).title)
        pkg_dict['codelist_multi'] = codelists
        #log.debug('Code lists: %s', ' '.join(codelists))
Exemplo n.º 8
0
    @classmethod
    def add_schema(cls, pkg_dict):
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        try:
            schema_ids = json.loads(pkg_dict.get('schema') or '[]')
        except ValueError:
            log.error('Not valid JSON in schema field: %s %r',
                      pkg_dict['name'], pkg_dict.get('schema'))
            schema_ids = None
        schemas = []
        for schema_id in schema_ids:
            try:
                schemas.append(Schema.get(schema_id).title)
            except AttributeError, e:
                log.error('Invalid schema_id: %r %s', schema_id, e)
        pkg_dict['schema_multi'] = schemas
        log.debug('Schema: %s', ' '.join(schemas))

        try:
            codelist_ids = json.loads(pkg_dict.get('codelist') or '[]')
        except ValueError:
            log.error('Not valid JSON in codelists field: %s %r',
                      pkg_dict['name'], pkg_dict.get('codelist'))
            codelists = None
        codelists = []
        for codelist_id in codelist_ids:
            codelists.append(Codelist.get(codelist_id).title)
        pkg_dict['codelist_multi'] = codelists
        log.debug('Code lists: %s', ' '.join(codelists))
Exemplo n.º 9
0
    def create_test_data(self):
        from ckan import plugins
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        pt = plugins.toolkit
        context = {'model': model, 'user': '******'}

        # Create schemas
        schemas = [
            dict(url='http://lga.org/toilet?v0.3', title='Toilet locations'),
            dict(url='http://spend.com/25', title='25k Spend'),
            dict(
                url='http://environment.data.gov.uk/def/bathing-water-quality/',
                title='Bathing water quality (ontology)'),
            dict(url='http://environment.data.gov.uk/def/bathing-water/',
                 title='Bathing water (ontology)'),
            dict(url='http://environment.data.gov.uk/def/bwq-cc-2012/',
                 title='Bathing water classifications'),
            dict(url='http://location.data.gov.uk/def/ef/SamplingPoint/',
                 title='Sampling point (environmental monitoring) ontology'),
            dict(url='http://www.w3.org/2006/time',
                 title='Time (OWL ontology)'),
            dict(url='http://purl.org/linked-data/cube',
                 title='Data cube (vocabulary)'),
            dict(url='http://www.w3.org/2004/02/skos/core',
                 title='Simple Knowledge Organization System (SKOS vocabulary)'
                 ),
            dict(url='http://purl.org/dc/terms/',
                 title='DCMI Metadata Terms (vocabulary)'),
            dict(url='http://xmlns.com/foaf/0.1/', title='FOAF Vocabulary'),
            dict(url='http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                 title='Statistical Data and Metadata Exchange (SDMX)'),
            dict(url='http://www.w3.org/2003/01/geo/wgs84_pos',
                 title='WGS84 Geo Positioning vocabulary'),
            dict(url='http://data.ordnancesurvey.co.uk/ontology/geometry/',
                 title='Ordnance Survey Geometry (ontology)'),
        ]
        for schema in schemas:
            existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
            else:
                model.Session.add(Schema(**schema))
            model.repo.commit_and_remove()

        codelists = [
            dict(
                url=
                'http://environment.data.gov.uk/registry/def/water-quality/_sampling_point_types',
                title='Water sampling point types'),
            dict(
                url=
                'http://environment.data.gov.uk/registry/def/water-quality/sampling_mechanisms',
                title='Water quality sampling mechanisms'),
        ]
        for codelist in codelists:
            existing_list = Codelist.by_title(codelist['title'])
            if existing_list:
                codelist['id'] = existing_list.id
                for k, v in codelist.items():
                    setattr(existing_list, k, v)
            else:
                model.Session.add(Codelist(**codelist))
            model.repo.commit_and_remove()

        # Create org
        org = dict(name='oxford',
                   title='Oxford',
                   type='organization',
                   is_organization=True,
                   category='local-council')
        existing_org = model.Group.get(org['name'])
        action = 'create' if not existing_org else 'update'
        if existing_org:
            org['id'] = existing_org.id
        org = pt.get_action('organization_%s' % action)(context, org)

        # Create datasets
        defaults = dict(license_id='uk-ogl',
                        owner_org=org['id'],
                        notes='This is a test')
        datasets = [
            dict(name='oxford-toilets',
                 title='Oxford toilets',
                 codelist=[],
                 schema=[Schema.by_title('Toilet locations').id]),
            dict(
                name='bathing-waters',
                title='Bathing waters',
                codelist=[
                    Codelist.by_title(title).id
                    for title in ('Water sampling point types',
                                  'Water quality sampling mechanisms')
                ],
                schema=[
                    Schema.by_url(url).id for url in (
                        'http://environment.data.gov.uk/def/bathing-water-quality/',
                        'http://environment.data.gov.uk/def/bathing-water/',
                        'http://environment.data.gov.uk/def/bwq-cc-2012/',
                        'http://location.data.gov.uk/def/ef/SamplingPoint/',
                        'http://www.w3.org/2006/time',
                        'http://purl.org/linked-data/cube',
                        'http://www.w3.org/2004/02/skos/core',
                        'http://purl.org/dc/terms/',
                        'http://xmlns.com/foaf/0.1/',
                        'http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                        'http://www.w3.org/2003/01/geo/wgs84_pos',
                        'http://data.ordnancesurvey.co.uk/ontology/geometry/',
                    )
                ])
        ]
        for dataset in datasets:
            dataset.update(defaults)
            existing_dataset = model.Package.get(dataset['name'])
            action = 'create' if not existing_dataset else 'update'
            if existing_dataset:
                dataset['id'] = existing_dataset.id
            dataset = pt.get_action('dataset_%s' % action)(context, dataset)

        print 'Datasets: ', ' '.join([dataset['name'] for dataset in datasets])
Exemplo n.º 10
0
    def create_test_data(self):
        from ckan import plugins
        from ckan import model
        from ckanext.dgu.model.schema_codelist import Schema, Codelist
        pt = plugins.toolkit
        context = {'model': model, 'user': '******'}

        # Create schemas
        schemas = [dict(url='http://lga.org/toilet?v0.3', title='Toilet locations'),
                   dict(url='http://spend.com/25', title='25k Spend'),
                   dict(url='http://environment.data.gov.uk/def/bathing-water-quality/', title='Bathing water quality (ontology)'),
                   dict(url='http://environment.data.gov.uk/def/bathing-water/', title='Bathing water (ontology)'),
                   dict(url='http://environment.data.gov.uk/def/bwq-cc-2012/', title='Bathing water classifications'),
                   dict(url='http://location.data.gov.uk/def/ef/SamplingPoint/', title='Sampling point (environmental monitoring) ontology'),
                   dict(url='http://www.w3.org/2006/time', title='Time (OWL ontology)'),
                   dict(url='http://purl.org/linked-data/cube', title='Data cube (vocabulary)'),
                   dict(url='http://www.w3.org/2004/02/skos/core', title='Simple Knowledge Organization System (SKOS vocabulary)'),
                   dict(url='http://purl.org/dc/terms/', title='DCMI Metadata Terms (vocabulary)'),
                   dict(url='http://xmlns.com/foaf/0.1/', title='FOAF Vocabulary'),
                   dict(url='http://purl.org/linked-data/sdmx/2009/sdmx-attribute', title='Statistical Data and Metadata Exchange (SDMX)'),
                   dict(url='http://www.w3.org/2003/01/geo/wgs84_pos', title='WGS84 Geo Positioning vocabulary'),
                   dict(url='http://data.ordnancesurvey.co.uk/ontology/geometry/', title='Ordnance Survey Geometry (ontology)'),
                   ]
        for schema in schemas:
            existing_schema = Schema.by_title(schema['title'])
            if existing_schema:
                schema['id'] = existing_schema.id
                for k, v in schema.items():
                    setattr(existing_schema, k, v)
            else:
                model.Session.add(Schema(**schema))
            model.repo.commit_and_remove()

        codelists = [
            dict(url='http://environment.data.gov.uk/registry/def/water-quality/_sampling_point_types', title='Water sampling point types'),
            dict(url='http://environment.data.gov.uk/registry/def/water-quality/sampling_mechanisms', title='Water quality sampling mechanisms'),
            ]
        for codelist in codelists:
            existing_list = Codelist.by_title(codelist['title'])
            if existing_list:
                codelist['id'] = existing_list.id
                for k, v in codelist.items():
                    setattr(existing_list, k, v)
            else:
                model.Session.add(Codelist(**codelist))
            model.repo.commit_and_remove()

        # Create org
        org = dict(name='oxford', title='Oxford',
                   type='organization',
                   is_organization=True,
                   category='local-council')
        existing_org = model.Group.get(org['name'])
        action = 'create' if not existing_org else 'update'
        if existing_org:
            org['id'] = existing_org.id
        org = pt.get_action('organization_%s' % action)(context, org)

        # Create datasets
        defaults = dict(license_id='uk-ogl',
                        owner_org=org['id'],
                        notes='This is a test')
        datasets = [
            dict(name='oxford-toilets',
                 title='Oxford toilets',
                 codelist=[],
                 schema=[Schema.by_title('Toilet locations').id]),
            dict(name='bathing-waters',
                 title='Bathing waters',
                 codelist=[Codelist.by_title(title).id for title in
                           ('Water sampling point types',
                            'Water quality sampling mechanisms')],
                 schema=[Schema.by_url(url).id for url in
                         (
                            'http://environment.data.gov.uk/def/bathing-water-quality/',
                            'http://environment.data.gov.uk/def/bathing-water/',
                            'http://environment.data.gov.uk/def/bwq-cc-2012/',
                            'http://location.data.gov.uk/def/ef/SamplingPoint/',
                            'http://www.w3.org/2006/time',
                            'http://purl.org/linked-data/cube',
                            'http://www.w3.org/2004/02/skos/core',
                            'http://purl.org/dc/terms/',
                            'http://xmlns.com/foaf/0.1/',
                            'http://purl.org/linked-data/sdmx/2009/sdmx-attribute',
                            'http://www.w3.org/2003/01/geo/wgs84_pos',
                            'http://data.ordnancesurvey.co.uk/ontology/geometry/',
                         )]
                     )]
        for dataset in datasets:
            dataset.update(defaults)
            existing_dataset = model.Package.get(dataset['name'])
            action = 'create' if not existing_dataset else 'update'
            if existing_dataset:
                dataset['id'] = existing_dataset.id
            dataset = pt.get_action('dataset_%s' % action)(context, dataset)

        print 'Datasets: ', ' '.join([dataset['name'] for dataset in datasets])