示例#1
0
 def create(cls, term, label, lang):
     inst = cls(term=term,
                label=label,
                lang=lang)
     Session.add(inst)
     Session.flush()
     return inst
示例#2
0
    def test_tokenizer(self):

        load_from_graph(path=self.licenses)
        Session.flush()
        tokens = License.get_as_tokens()
        self.assertTrue(len(tokens.keys())>0)

        from_token, default = License.find_by_token('cc-by-sa')
        self.assertFalse(default)
        self.assertTrue(from_token)
        self.assertTrue('ccbysa' in from_token.uri.lower())

        from_token, default = License.find_by_token('cc-zero') #http://opendefinition.org/licenses/cc-zero/')
        self.assertFalse(default)
        self.assertTrue(from_token)

        self.assertTrue('PublicDomain' in from_token.license_type)
        
        from_token, default = License.find_by_token('Creative Commons Attribuzione') #http://opendefinition.org/licenses/cc-zero/')
        self.assertFalse(default)
        self.assertTrue(from_token)

        self.assertTrue('Attribution' in from_token.license_type)

        odbl = """["Open Data Commons Open Database License / OSM (ODbL/OSM): You are free to copy, distribute, transmit and adapt our data, as long as you credit OpenStreetMap and its contributors\nIf you alter or build upon our data, you may distribute the result only under the same licence. (http://www.openstreetmap.org/copyright)"]"""

        from_token, default = License.find_by_token(odbl, 'other')
        self.assertFalse(default)
        self.assertTrue(from_token)
        self.assertTrue('odbl' in from_token.default_name.lower())
示例#3
0
def load_themes():
    vocab_file_path = _get_path('data-theme-skos.rdf', 'vocabularies')

    class Opts(object):
        def __init__(self, filename, name, format):
            self.filename = filename
            self.url = None  #filename
            self.name = name
            self.format = format

    themes_loader.options = Opts(vocab_file_path, 'eu_themes', None)
    themes_loader.load()

    tag_localized = interfaces.get_localized_tag_name('ECON')
    Session.flush()
    assert tag_localized
    q = Session.query(Vocabulary).filter_by(name='eu_themes')
    vocab = q.first()
    assert vocab

    map_f = _get_path(MAPPING_FILE)
    voc_f = _get_path(EUROVOC_FILE)
    clear_subthemes()
    load_subthemes(map_f, voc_f)
    assert Subtheme.q().first()
 def setUp(self):
     def get_path(fname):
         return os.path.join(os.path.dirname(__file__),
                     '..', '..', '..', 'examples', fname)
     licenses = get_path('licenses.rdf')
     load_from_graph(path=licenses)
     Session.flush()
    def test_ckan_duplicated_name(self):
        dataset0 = {
            'owner_org': self.org['id'],
            'holder_name': 'test holder',
            'holder_identifier': 'abcdef',
            'notes': 'some notes',
            'modified': '2000-01-01',
            'theme': 'AGRI',
            'frequency': 'UNKNOWN',
            'publisher_name': 'publisher',
            'identifier': 'aasdfa',
            'publisher_identifier': 'publisher',
            'resources': [],
            'extras': [],
        }

        dataset1 = {
            'owner_org': self.org['id'],
            'title': 'duplicated title',
            'name': 'duplicated-title',
            'id': 'dummyid'
        }
        dataset1.update(dataset0)
        data = json.dumps(dataset1)

        harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname1', owner_org=self.org['id'])
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = data
        h = DCATRDFHarvester()
        import_successful = h.import_stage(harvest_obj)
        self.assertTrue(import_successful, harvest_obj.errors)
        Session.flush()
        dataset1['_id'] = harvest_obj.package_id

        dataset2 = {'title': 'duplicated title',
                    'name': 'duplicated-title',
                    'id': 'dummyid2'}

        dataset2.update(dataset0)
        dataset2['identifier'] = 'otherid'
        data = json.dumps(dataset2)

        harvest_dict = self._create_harvest_obj('http://mock/source/', name='dupname2', owner_org=self.org['id'])
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = data
        h = DCATRDFHarvester()
        import_successful = h.import_stage(harvest_obj)
        self.assertTrue(import_successful, harvest_obj.errors)
        Session.flush()
        dataset2['_id'] = harvest_obj.package_id

        # duplicated names are mangled, one should have numeric suffix
        pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset1['_id'])
        self.assertEqual(pkg_dict['title'], dataset1['title'])
        self.assertEqual(pkg_dict['name'], 'duplicated-title')

        pkg_dict = helpers.call_action('package_show', context={}, name_or_id=dataset2['_id'])
        self.assertEqual(pkg_dict['title'], dataset2['title'])
        self.assertEqual(pkg_dict['name'], 'duplicated-title1')
示例#6
0
    def clear(cls):
        Session.query(LocalizedLicenseName).delete()
        Session.query(cls).delete()

        try:
            rev = Session.revision
        except AttributeError:
            rev = None
        Session.flush()
        Session.revision = rev
 def _create_harvest_obj(self, mock_url, **kwargs):
     ctx = {'session': Session, 'model': model}
     s = self._create_harvest_source(ctx, mock_url, **kwargs)
     Session.flush()
     j = self._create_harvest_job(ctx, s['id'])
     Session.flush()
     h = helpers.call_action('harvest_object_create',
                             context=ctx,
                             job_id=j['id'],
                             source_id=s['id'])
     return h
示例#8
0
    def clear(self):
        sq = Session.query(VocabularyTerm.id).filter(VocabularyTerm.vocabulary_id==self.id)
        Session.query(VocabularyLabel)\
                   .filter(VocabularyLabel.term_id.in_(sq.subquery())).delete(synchronize_session=False)

        #for vl in q:
        #    Session.delete(vl)
        sq.delete()

        #for vl in q:
        #    Session.delete(vl)
        Session.flush()
示例#9
0
 def set_parent(self, parent_uri):
     """
     Set parent for given license
     """
     parent = License.get(parent_uri)
     if not parent:
         raise ValueError("No parent %s object" % parent_uri)
     self.parent_id = parent.id
     Session.add(self)
     try:
         rev = Session.revision
     except AttributeError:
         rev = None
     Session.flush()
示例#10
0
    def create(cls, vocab, name, labels=None, parent=None, properties=None):
        if not isinstance(vocab, Vocabulary):
            vocab = Vocabulary.get(vocab)
        
        inst = cls(vocabulary=vocab,
                   name=name,
                   depth=parent.depth +1 if parent else 0,
                   parent=parent)
        inst.properties = properties or {}
        if labels:
            inst.set_labels(labels)

        inst.update_path()
        Session.add(inst)
        Session.flush()
        return inst
示例#11
0
    def test_licenses(self):

        load_from_graph(path=self.licenses)
        Session.flush()

        all_licenses = License.q()
        count = all_licenses.count()
        self.assertTrue(count> 0)
        self.assertTrue(count == len(list(self.g.subjects(None, SKOS.Concept))))
        
        all_localized = LocalizedLicenseName.q()
        self.assertTrue(all_localized.count() > 0)

        for_select = License.for_select('it')
        
        # check license type
        self.assertTrue(all([s[0] for s in for_select]))
    def updateValidationJobStatus(self,
                                  session=None,
                                  resource_id=None,
                                  status=None,
                                  report=None,
                                  error=None,
                                  validationRecord=None):
        # type: (object, Session, str, str, object, object) -> model.Validation
        """
        If report or error is attached, update finished to be now
        :param self:
        :param session Session
        :param resource_id:
        :param status:
        :param report:
        :param error:
        :return:
        """
        log.debug("updateValidationJobStatus: %s status: %s", resource_id,
                  status)
        if validationRecord is None:
            validationRecord = self.getValidationJob(session, resource_id)

        if validationRecord is None:
            log.error("record not found to update statues: %s", resource_id)
            raise ValidationJobDoesNotExist()

        # Handle already running status in in last hour
        if status == StatusTypes.running and validationRecord.status == status:
            if self.getHoursSince(validationRecord.created) < 1:
                raise ValidationJobAlreadyRunning()

        validationRecord.status = status
        validationRecord.report = report
        validationRecord.error = error
        if status in (StatusTypes.success, StatusTypes.failure,
                      StatusTypes.error):
            validationRecord.finished = datetime.datetime.utcnow()

        Session.add(validationRecord)
        Session.commit()
        # Flush so other transactions are not waiting
        Session.flush()
        return validationRecord
示例#13
0
def map_nonconformant_groups(harvest_object):
    """
    Adds themes to fetched data
    """
    themes_data = _load_mapping_data()
    if not themes_data:
        return

    data = json.loads(harvest_object.content)
    _groups = data.get('groups')
    if not _groups:
        return

    groups = [g['name'] for g in _groups]
    groups.extend([g['display_name'] for g in _groups if 'display_name' in g])

    new_themes = _get_new_themes(groups, themes_data, add_existing=False)
    if not new_themes:
        return

    # ensure themes are upper-case, otherwise will be discarded
    # by validators
    tdata = {'key': 'theme', 'value': _encode_list(new_themes).upper()}
    existing = False
    extra = data.get('extras') or []
    for eitem in extra:
        if eitem['key'] == 'theme':
            existing = True
            eitem['value'] = tdata['value']
            break

    if not existing:
        extra.append(tdata)
    data['extras'] = extra
    data['theme'] = tdata['value']

    harvest_object.content = json.dumps(data)
    Session.add(harvest_object)
    try:
        rev = Session.revision
    except AttributeError:
        rev = None
    Session.flush()
    Session.revision = rev
示例#14
0
    def test_ckan_harvester_license(self):

        dataset = {
            'title':
            'some title',
            'id':
            'sometitle',
            'resources': [{
                'id': 'resource/1111',
                'url': 'http://resource/1111',
                'license_type': 'invalid',
            }, {
                'id':
                'resource/2222',
                'url':
                'http://resource/2222',
                'license_type':
                'https://w3id.org/italia/controlled-vocabulary/licences/A311_GFDL13'
            }]
        }

        data = json.dumps(dataset)
        harvest_dict = self._create_harvest_obj('http://mock/source/',
                                                name='testpkg')
        harvest_obj = HarvestObject.get(harvest_dict['id'])
        harvest_obj.content = data
        h = CKANMappingHarvester()
        h.import_stage(harvest_obj)
        Session.flush()

        pkg_dict = helpers.call_action('package_show',
                                       context={},
                                       name_or_id='sometitle')
        self.assertTrue(len(pkg_dict['resources']) == 2)

        resources = pkg_dict['resources']
        r = dataset['resources']
        for res in resources:
            if res['id'] == r[0]['id']:
                self.assertEqual(res['license_type'],
                                 License.get(License.DEFAULT_LICENSE).uri)
            else:
                self.assertEqual(res['license_type'], r[1]['license_type'])
示例#15
0
    def from_data(cls,
                  license_type,
                  version,
                  uri,
                  path,
                  document_uri,
                  rank_order,
                  names,
                  default_lang=None,
                  parent=None):

        if default_lang is None:
            default_lang = 'it'
        default_name = names[default_lang]

        if parent is not None:
            parent_inst = Session.query(License).filter_by(
                uri=str(parent)).first()
            if parent_inst:
                parent = parent_inst.id

        inst = cls(license_type=license_type,
                   version=version,
                   uri=uri,
                   path=path,
                   document_uri=document_uri,
                   rank_order=rank_order,
                   parent_id=parent,
                   default_name=default_name)
        Session.add(inst)
        try:
            rev = Session.revision
        except AttributeError:
            rev = None
        Session.flush()
        Session.revision = rev
        inst.set_names(names)
        Session.flush()
        Session.revision = rev
        return inst
    def setUp(self):
        licenses = get_voc_file(LICENSES_FILE)
        load_licenses(load_graph(licenses))
        Session.flush()

        user = User.get('dummy')

        if not user:
            user = call_action('user_create',
                               name='dummy',
                               password='******',
                               email='*****@*****.**')
            user_name = user['name']
        else:
            user_name = user.name
        org = Group.by_name('dummy')
        if org:
            self.org = org.__dict__
        else:
            self.org = call_action('organization_create',
                              context={'user': user_name},
                              name='dummy',
                              identifier='aaaaaa')
示例#17
0
def map_nonconformant_groups(harvest_object):
    """
    Adds themes to fetched data
    """
    themes_data = _load_mapping_data()
    if not themes_data:
        return

    data = json.loads(harvest_object.content)
    _groups = data.get('groups')
    if not _groups:
        return

    groups = [g['name'] for g in _groups]
    groups.extend([g['display_name'] for g in _groups if 'display_name' in g])

    new_themes = _get_new_themes(groups, themes_data, add_existing=False)
    if not new_themes:
        return

    # ensure themes are upper-case, otherwise will be discarded by validators

    extra = data.get('extras') or []

    aggr = themes_to_aggr_json(new_themes)
    _set_extra(extra, FIELD_THEMES_AGGREGATE, aggr)
    data[FIELD_THEMES_AGGREGATE] = aggr

    theme_list = json.dumps([theme_name_to_uri(name) for name in new_themes])
    _set_extra(extra, 'theme', theme_list)
    data['theme'] = theme_list

    data['extras'] = extra

    harvest_object.content = json.dumps(data)
    Session.add(harvest_object)
    Session.flush()
示例#18
0
    def add_for_theme(cls, g, theme_ref, subtheme_ref, parent=None):
        theme = cls.normalize_theme(theme_ref)
        existing = cls.q().filter_by(uri=str(subtheme_ref)).first()
        theme_tag = ThemeToSubtheme.get_tag(theme)

        revision = getattr(Session, 'revision', None) or repo.new_revision()

        # several themes may refer to this subtheme, so we'll just return
        # exising instance
        if existing:
            if not theme_tag in existing.themes:
                existing.themes.append(theme_tag)
            Session.flush()
            Session.revision = revision
            log.error("Subtheme %s already exists. Skipping", subtheme_ref)
            return existing

        labels = {}
        for l in g.objects(subtheme_ref, SKOS.prefLabel):
            labels[l.language] = unicode(l)
        if not labels:
            log.error("NO labels for %s. Skipping", subtheme_ref)
            return
        version = g.value(subtheme_ref, OWL.versionInfo) or ''
        identifier = g.value(subtheme_ref, DCT.identifier) or ''
        default_label = labels[DEFAULT_LANG]
        inst = cls(version=str(version),
                   identifier=str(identifier),
                   uri=str(subtheme_ref),
                   default_label=default_label,
                   parent_id=parent.id if parent else None,
                   depth=parent.depth + 1 if parent else 0)
        inst.update_path()
        Session.add(inst)
        Session.flush()
        Session.revision = revision

        if parent is None:
            inst.parent_id = inst.id

        theme_m = ThemeToSubtheme(tag_id=theme_tag.id, subtheme_id=inst.id)
        Session.add(theme_m)

        for lang, label in labels.items():
            l = SubthemeLabel(subtheme_id=inst.id, lang=lang, label=label)
            Session.add(l)
        Session.flush()
        Session.revision = revision
        # handle children

        for child in g.objects(subtheme_ref, SKOS.hasTopConcept):
            cls.add_for_theme(g, theme_ref, child, inst)

        return inst
示例#19
0
 def create(cls, name, has_relations=False):
     inst = cls(name=name, has_relations=has_relations)
     Session.add(inst)
     Session.flush()
     return inst
示例#20
0
def populate_theme_groups(instance, clean_existing=False):
    """
    For given instance, it finds groups from mapping corresponding to
    Dataset's themes, and will assign dataset to those groups.

    Existing groups will be removed, if clean_existing is set to True.

    This utilizes `ckanext.dcatapit.theme_group_mapping.add_new_groups`
    configuration option. If it's set to true, and mapped group doesn't exist,
    new group will be created.
    """
    add_new = toolkit.asbool(
        config.get(DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS))
    themes = []
    for ex in (instance.get('extras') or []):
        if ex['key'] == FIELD_THEMES_AGGREGATE:
            _t = ex['value']
            if isinstance(_t, list):
                themes.extend(_t)
            else:
                try:
                    tval = json.loads(_t)
                except Exception:
                    log.warning(f'Trying old themes format for {_t}')
                    tval = [{
                        'theme': t,
                        'subthemes': []
                    } for t in _t.strip('{}').split(',')]

                for tv in tval:
                    themes.append(tv['theme'])

            break  # we don't need any other info - if there are 'themes' is ok to bypass them

        elif ex['key'] == 'theme':
            _t = ex['value']
            if isinstance(_t, list):
                themes.extend(_t)
            else:
                try:
                    tval = json.loads(_t)
                except Exception:
                    log.warning(f'Trying old themes format for {_t}')
                    tval = _t.strip('{}').split(',')

                themes.extend(tval)
            # dont break the for loop: if aggregates are there, they get precedence

    if not themes:
        log.debug('no theme from %s', instance)
        return instance
    theme_map = get_theme_to_groups()

    if not theme_map:
        log.warning('Theme to group map is empty')
        return instance
    if not isinstance(themes, list):
        themes = [themes]
    all_groups = set()
    for theme in themes:
        _groups = theme_map.get(theme)
        if not _groups:
            continue
        all_groups = all_groups.union(set(_groups))
    if clean_existing:
        _clean_groups(instance)
    groups = []
    for gname in all_groups:
        gname = gname.strip()
        if not gname:
            continue
        group = Group.get(gname) or _get_group_from_session(gname)
        if add_new and group is None:
            group = Group(name=gname)
            Session.add(group)
        if group:
            groups.append(group)

    if Session.new:
        # flush to db, refresh with ids
        Session.flush()
        groups = [(Group.get(g.name) if g.id is None else g) for g in groups]
    _add_groups(instance['id'], set(groups))

    Session.flush()
    return instance
示例#21
0
 def clear_labels(self):
     Session.query(VocabularyLabel).filter(VocabularyLabel.term_id==self.id).delete()
     Session.flush()
示例#22
0
 def update(self, labels=labels, parent=None, properties=None):
     self.clear_labels()
     self.set_labels(labels)
     self.properties = properties or {}
     Session.add(self)
     Session.flush()
示例#23
0
def populate_theme_groups(instance, clean_existing=False):
    """
    For given instance, it finds groups from mapping corresponding to
    Dataset's themes, and will assign dataset to those groups.

    Existing groups will be removed, if clean_existing is set to True.

    This utilizes `ckanext.dcatapit.theme_group_mapping.add_new_groups`
    configuration option. If it's set to true, and mapped group doesn't exist,
    new group will be created.
    """
    add_new = toolkit.asbool(
        config.get(DCATAPIT_THEME_TO_MAPPING_ADD_NEW_GROUPS))
    themes = []
    for ex in (instance.get('extras') or []):
        if ex['key'] == 'theme':
            _t = ex['value']
            if isinstance(_t, list):
                themes.extend(_t)
            else:
                try:
                    tval = json.loads(_t)
                except Exception:
                    tval = [{
                        'theme': t,
                        'subthemes': []
                    } for t in _decode_list(_t)]
                for tv in tval:
                    themes.append(tv['theme'])
    if not themes:
        log.debug("no theme from %s", instance)
        return instance
    theme_map = get_theme_to_groups()
    if not theme_map:
        log.warning("Theme to group map is empty")
        return instance
    if not isinstance(themes, list):
        themes = [themes]
    all_groups = set()
    for theme in themes:
        _groups = theme_map.get(theme)
        if not _groups:
            continue
        all_groups = all_groups.union(set(_groups))
    if clean_existing:
        _clean_groups(instance)
    groups = []
    for gname in all_groups:
        gname = gname.strip()
        if not gname:
            continue
        group = Group.get(gname) or _get_group_from_session(gname)
        if add_new and group is None:
            group = Group(name=gname)
            Session.add(group)
        if group:
            groups.append(group)

    if Session.new:
        # flush to db, refresh with ids
        rev = Session.revision
        Session.flush()
        Session.revision = rev
        groups = [(Group.get(g.name) if g.id is None else g) for g in groups]
    _add_groups(instance['id'], set(groups))

    # preserve revision, since it's not a commit yet
    rev = Session.revision
    Session.flush()
    Session.revision = rev

    return instance
    def test_graph_from_dataset(self):

        conforms_to_in = [{'identifier': 'CONF1',
                                       'uri': 'conf01',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                {'identifier': 'CONF2',
                                 'title': {'en': 'title', 'it': 'title'},
                                 'description': {'en': 'descen', 'it': 'descit'},
                                 'referenceDocumentation': ['http://abc.efg/'],},
                                 ]

        alternate_identifiers = [{'identifier': 'aaaabc',
                                 'agent': {'agent_identifier': 'agent01',
                                           'agent_name': {'en': 'Agent en 01', 'it': 'Agent it 01'}},
                                 },
                                 {'identifier': 'other identifier', 'agent': {}}]
        creators = [{'creator_name': {'en': 'abc'}, 'creator_identifier': "ABC"},
                    {'creator_name': {'en': 'cde'}, 'creator_identifier': "CDE"},
                    ]

        temporal_coverage = [{'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 10:11:12'},
                             {'temporal_start': '2001-01-01', 'temporal_end': '2001-02-01 11:12:13'},
                            ]

        subthemes = [{'theme': 'AGRI', 'subthemes': ['http://eurovoc.europa.eu/100253',
                                                     'http://eurovoc.europa.eu/100258']},
                     {'theme': 'ENVI', 'subthemes': []}]

        dataset = {
            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
            'name': 'test-dataset',
            'title': 'Dataset di test DCAT_AP-IT',
            'notes': 'dcatapit dataset di test',
            'metadata_created': '2015-06-26T15:21:09.034694',
            'metadata_modified': '2015-06-26T15:21:09.075774',
            'tags': [{'name': 'Tag 1'}, {'name': 'Tag 2'}],
            'issued':'2016-11-29',
            'modified':'2016-11-29',
            'identifier':'ISBN',
            'temporal_start':'2016-11-01',
            'temporal_end':'2016-11-30',
            'frequency':'UPDATE_CONT',
            'publisher_name':'bolzano',
            'publisher_identifier':'234234234',
            'creator_name':'test',
            'creator_identifier':'412946129',
            'holder_name':'bolzano',
            'holder_identifier':'234234234',
            'alternate_identifier':json.dumps(alternate_identifiers),
            'temporal_coverage': json.dumps(temporal_coverage),
            #'theme':'ECON',
            'geographical_geonames_url':'http://www.geonames.org/3181913',
            'language':'{DEU,ENG,ITA}',
            'is_version_of':'http://dcat.geo-solutions.it/dataset/energia-da-fonti-rinnovabili2',
            'conforms_to':json.dumps(conforms_to_in),
            'creator': json.dumps(creators),
            'theme': json.dumps(subthemes),


        }
        
        pkg_id = dataset['id']
        
        pub_names = {'it': 'IT publisher',
                     'es': 'EN publisher'}
        holder_names = {'it': 'IT holder name',
                        'es': 'EN holder name'}

        multilang_fields = [('publisher_name', 'package', k, v) for k, v in pub_names.items()] +\
                           [('holder_name', 'package', k, v) for k, v in holder_names.items()]
        
        pkg = helpers.call_action('package_create', {'defer_commit': True}, **dataset)
        rev = getattr(Session,  'revision', repo.new_revision())
        Session.flush()
        Session.revision = rev
        pkg_id = pkg['id']

        for field_name, field_type, lang, text in multilang_fields:
            interfaces.upsert_package_multilang(pkg_id, field_name, field_type, lang, text)

        loc_dict = interfaces.get_for_package(pkg_id)
        #assert loc_dict['publisher_name'] == pub_names
        #assert loc_dict['holder_name'] == holder_names


        # temporary bug for comaptibility with interfaces.get_language(),
        # which will return lang[0]
        pub_names.update({DEFAULT_LANG: dataset['publisher_name']})
        # pub_names.update({DEFAULT_LANG[0]: dataset['publisher_name']})
        holder_names.update({DEFAULT_LANG: dataset['holder_name']})
        # holder_names.update({DEFAULT_LANG[0]: dataset['holder_name']})
        
        s = RDFSerializer()
        g = s.g

        dataset_ref = s.graph_from_dataset(dataset)

        eq_(unicode(dataset_ref), utils.dataset_uri(dataset))

        # Basic fields
        assert self._triple(g, dataset_ref, RDF.type, DCATAPIT.Dataset)
        assert self._triple(g, dataset_ref, DCT.title, dataset['title'])
        assert self._triple(g, dataset_ref, DCT.description, dataset['notes'])

        assert self._triple(g, dataset_ref, DCT.identifier, dataset['identifier'])

        # Tags
        eq_(len([t for t in g.triples((dataset_ref, DCAT.keyword, None))]), 2)
        for tag in dataset['tags']:
            assert self._triple(g, dataset_ref, DCAT.keyword, tag['name'])
        
        # conformsTo
        conforms_to = list(g.triples((None, DCT.conformsTo, None)))
        assert conforms_to

        conforms_to_dict = dict((d['identifier'], d) for d in conforms_to_in)
        for conf in conforms_to:
            conf_id = conf[-1]

            identifier = g.value(conf_id, DCT.identifier)
            titles = list(g.objects(conf_id, DCT.title))
            descs = list(g.objects(conf_id, DCT.description))
            references = list(g.objects(conf_id, DCATAPIT.referenceDocumentation))
            
            check = conforms_to_dict.get(str(identifier))
            
            assert isinstance(check, dict)

            if check.get('uri'):
                assert check['uri'] == str(conf_id)
            assert len(titles), "missing titles"
            
            assert (len(descs)> 0) == bool(check.get('description')), "missing descriptions"

            for title in titles:
                tlang = title.language
                tval = str(title)
                assert tval == check['title'][tlang], (tlang, tval, check['title'])

            for desc in descs:
                tlang = desc.language
                tval = str(desc)
                assert tval == check['description'][tlang], (tlang, str(tval), check['description'])
            
            ref_docs = check.get('referenceDocumentation')
            assert len(references) == len(ref_docs), "missing reference documentation"
            
            for dref in references:
                assert str(dref) in ref_docs, "{} not in {}".format(dref, ref_docs)
                                                                
            for ref in ref_docs:
                assert URIRef(ref) in references

        # alternate identifiers
        alt_ids = [a[-1] for a in g.triples((None, ADMS.identifier, None))]
        alt_ids_dict = dict((a['identifier'], a) for a in alternate_identifiers)

        for alt_id in alt_ids:
            identifier = g.value(alt_id, SKOS.notation)
            check = alt_ids_dict[str(identifier)]
            assert str(identifier) == check['identifier']
            if check.get('agent'):
                agent_ref = g.value(alt_id, DCT.creator)
                assert agent_ref is not None

                agent_identifier = g.value(agent_ref, DCT.identifier)

                agent_name = dict((v.language, str(v)) for v in g.objects(agent_ref, FOAF.name))
                
                assert set(agent_name.items()) == set(check['agent']['agent_name'].items()),\
                    "expected {}, got {} for {}".format(check['agent']['agent_name'], agent_name, agent_ref)

                assert str(agent_identifier) == check['agent']['agent_identifier'],\
                    "expected {}, got {}".format(check['agent']['agent_identifier'], agent_identifier)
        # creators
        creators.append({'creator_name':{'en': 'test'},
                         'creator_identifier':'412946129'})
        creators_in = list(g.objects(dataset_ref, DCT.creator))
        assert len(creators) == len(creators_in)

        for cref in creators_in:
            cnames = dict((str(c.language) if c.language else DEFAULT_LANG, str(c)) for c in g.objects(cref, FOAF.name))
            c_identifier = g.value(cref, DCT.identifier)
            c_dict = {'creator_name': cnames,
                      'creator_identifier': str(c_identifier)}
            assert c_dict in creators, "no {} in {}".format(c_dict, creators)

        # temporal coverage
        temporal_coverage.append({'temporal_start': dataset['temporal_start'],
                                  'temporal_end': dataset['temporal_end']})
        temp_exts = list(g.triples((dataset_ref, DCT.temporal, None)))
        assert len(temp_exts) == len(temporal_coverage)
        
        # normalize values
        for item in temporal_coverage:
            for k, v in item.items():
                item[k] = pdate(v)

        temp_ext = []
        for interval_t in temp_exts:
            interval = interval_t[-1]
            start = g.value(interval, SCHEMA.startDate)
            end = g.value(interval, SCHEMA.endDate)
            assert start is not None
            assert end is not None
            temp_ext.append({'temporal_start': pdate(str(start)),
                             'temporal_end': pdate(str(end))})

        set1 = set([tuple(d.items()) for d in temp_ext])
        set2 = set([tuple(d.items()) for d in temporal_coverage])
        assert set1 == set2, "Got different temporal coverage sets: \n{}\n vs\n {}".format(set1, set2)

        for pub_ref in g.objects(dataset_ref, DCT.publisher):
            _pub_names = list(g.objects(pub_ref, FOAF.name))

            assert len(_pub_names) 

            for pub_name in _pub_names:
                if pub_name.language:
                    assert str(pub_name.language) in pub_names, "no {} in {}".format(pub_name.language, pub_names)
                    assert pub_names[str(pub_name.language)] == str(pub_name), "{} vs {}".format(pub_name, pub_names)

        for holder_ref in g.objects(dataset_ref, DCT.rightsHolder):
            _holder_names = list(g.objects(holder_ref, FOAF.name))

            assert len(_holder_names) 

            for holder_name in _holder_names:
                if holder_name.language:
                    assert str(holder_name.language) in holder_names, "no {} in {}".format(holder_name.language, holder_names)
                    assert holder_names[str(holder_name.language)] == str(holder_name), "{} vs {}".format(holder_name, holder_names)