def _do_test(self, input, output, urn_fallback=False): if isinstance(output, Exception): with pytest.raises(type(output)) as e: IRILink().execute(input) assert e.value.args == output.args else: assert {k: v for k, v in IRILink(urn_fallback=urn_fallback).execute(input).items() if k in output} == output
def regulate_node(self, node): old_iri = node[self.iri_field] try: ret = IRILink(urn_fallback=self.urn_fallback).execute(old_iri) node[self.authority_field] = ret['authority'] node[self.scheme_field] = ret['scheme'] new_iri = ret['IRI'] if old_iri != new_iri: node[self.iri_field] = new_iri self.info( 'Normalized IRI "{}" into "{}"'.format(old_iri, new_iri), node.id) if self.blocked_schemes and ret['scheme'] in self.blocked_schemes: self.info( 'Discarding identifier based on invalid scheme "{}"'. format(ret['scheme']), node.id) node.delete() elif self.blocked_authorities and ret[ 'authority'] in self.blocked_authorities: self.info( 'Discarding identifier based on invalid authority "{}"'. format(ret['authority']), node.id) node.delete() except InvalidIRI as e: self.info( 'Discarding identifier based on unrecognized IRI "{}"'.format( old_iri), node.id) node.delete()
def normalize(self, node, graph): try: ret = IRILink().execute(node.attrs['uri']) except InvalidIRI as e: logger.warning('Discarding invalid identifier %s with error %s', node.attrs['uri'], e) graph.remove(node) return if ret['authority'] in {'issn', 'orcid.org' } or ret['scheme'] in {'mailto'}: logger.warning( 'Discarding %s %s as an invalid identifier for works', ret['authority'], ret['IRI']) graph.remove(node) return if node.attrs['uri'] != ret['IRI']: logger.debug('Normalized %s to %s', node.attrs['uri'], ret['IRI']) node.attrs = { 'uri': ret['IRI'], 'host': ret['authority'], 'scheme': ret['scheme'], }
def normalize(self, node, graph): try: ret = IRILink().execute(node.attrs['uri']) except InvalidIRI as e: logger.warning('Discarding invalid identifier %s with error %s', node.attrs['uri'], e) graph.remove(node) return if node.attrs['uri'] != ret['IRI']: logger.debug('Normalized %s to %s', node.attrs['uri'], ret['IRI']) node.attrs = { 'uri': ret['IRI'], 'host': ret['authority'], 'scheme': ret['scheme'], }
def add_source_identifier(self, source_id, jsonld, root_ref): from share.transform.chain.links import IRILink uri = IRILink(urn_fallback=True).execute(str(source_id))['IRI'] if any(n['@type'].lower() == 'workidentifier' and n['uri'] == uri for n in jsonld['@graph']): return identifier_ref = { '@id': '_:' + uuid.uuid4().hex, '@type': 'workidentifier' } identifier = {'uri': uri, 'creative_work': root_ref, **identifier_ref} root_node = next( n for n in jsonld['@graph'] if n['@id'] == root_ref['@id'] and n['@type'] == root_ref['@type']) root_node.setdefault('identifiers', []).append(identifier_ref) jsonld['@graph'].append(identifier)
def test_benchmark(self, input, benchmark): benchmark(IRILink().execute, input)
def is_valid_iri(iri): try: IRILink().execute(iri) except InvalidIRI: return False return True
def is_valid_iri(iri): if not isinstance(iri, str): return False IRILink().execute(iri) return True
def parse(self, _, parse, **kwargs): if parse: parsed = IRILink().execute(self['uri']) self['uri'] = parsed['IRI'] self['scheme'] = parsed['scheme'] self['host'] = parsed['authority']
def _parse(self, *args, **kwargs): if self.attrs.pop('parse'): parsed = IRILink().execute(self.attrs['uri']) self.attrs['uri'] = parsed['IRI'] self.attrs['scheme'] = parsed['scheme'] self.attrs['host'] = parsed['authority']