Пример #1
0
def load_header(filepath, remote=False):
    oo = b'owl:Ontology'
    path = Path(filepath)
    if path.suffix == '.ttl':
        infmt = 'turtle'
    else:
        infmt = 'xml'  # FIXME assumption

    if remote:
        resp = requests.get(
            filepath
        )  # TODO nonblocking pull these out, fetch, run inner again until done
        raw = resp.text.encode()
    else:
        with open(filepath, 'rb') as f:  # do not catch FileNotFoundErrors
            raw = f.read()

    if oo in raw:  # we only care if there are imports or an ontology iri
        scratch = OntGraph()
        if infmt == 'turtle':
            data, rest = raw.split(b'###', 1)
        elif infmt == None:  # assume xml
            xml_tree = etree.parse(BytesIO(raw))
            xml_root = xml_tree.getroot()
            xml_ontology = xml_tree.xpath(
                "/*[local-name()='RDF']/*[local-name()='Ontology']")
            xml_root.clear()
            xml_root.append(xml_ontology[0])
            data = etree.tostring(xml_root)
        scratch.parse(data=data, format=infmt)

    return scratch
Пример #2
0
def npokb():
    index_graph = OntGraph(path=auth.get_path('ontology-local-repo') /
                           'ttl/generated/neurons/npokb-index.ttl')

    if index_graph.path.exists():
        index_graph.parse()

    # testing
    index_graph.bind('npokb', npokb)
    #[index_graph.add((npokb[str(i)], rdf.type, owl.Class)) for i in range(1, 11)]
    #[index_graph.add((npokb[str(i)], ilxtr.hasTemporaryId, TEMP[str(i)])) for i in range(1, 11)]

    ios = []
    for eff in ('common-usage-types', 'huang-2017', 'markram-2015',
                'allen-cell-types'):
        path = auth.get_path(
            'ontology-local-repo') / f'ttl/generated/neurons/{eff}.ttl'
        input_graph = OntGraph(path=path)
        input_graph.parse()
        output_graph = input_graph.mapTempToIndex(index_graph, npokb, TEMP)
        ios.append((input_graph, output_graph))

    input_graph, output_graph = ios[0]
    a, r, c = output_graph.subjectsChanged(input_graph)
    index_graph.write()
    # [o.write() for i, o, in ios]  # when ready
    #from sparcur.paths import Path
    #Path(index_graph.path).xopen()
    breakpoint()
Пример #3
0
def main():

    #InterLexSneechenator()
    test()

    return
    # testing
    index_graph.bind('ILX', ILX)
    #[index_graph.add((npokb[str(i)], rdf.type, owl.Class)) for i in range(1, 11)]
    #[index_graph.add((npokb[str(i)], ilxtr.hasTemporaryId, TEMP[str(i)])) for i in range(1, 11)]

    ios = []
    for eff in ('phenotype-core.ttl', 'phenotypes.ttl'):
        path = auth.get_path('ontology-local-repo') / eff
        input_graph = OntGraph(path=path)
        input_graph.parse()
        output_graph = input_graph.mapTempToIndex(index_graph, ILX, ilxtr)
        ios.append((input_graph, output_graph))

    input_graph, output_graph = ios[0]
    a, r, c = output_graph.subjectsChanged(input_graph)
    index_graph.write()
    # [o.write() for i, o, in ios]  # when ready
    #from sparcur.paths import Path
    #Path(index_graph.path).xopen()
    breakpoint()
Пример #4
0
    def triples(self):
        crossref_doi_pred = rdflib.term.URIRef('http://prismstandard.org/namespaces/basic/2.1/doi')
        for blob in self.data['identifier_metadata']:
            id = blob['id']
            if not isinstance(id, idlib.Stream):
                id = idlib.Auto(id)

            if not hasattr(id, 'asUri'):
                breakpoint()

            s = id.asUri(rdflib.URIRef)
            if 'source' in blob:
                source = blob['source']  # FIXME we need to wrap this in our normalized representation
                if source == 'Crossref':  # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl
                    pos = (
                        (rdf.type, owl.NamedIndividual),
                        (rdf.type, TEMP[blob['type']]),
                        (dc.publisher, blob['publisher']),
                        #(dc.type, blob['type']),  # FIXME semantify
                        (dc.title, blob['title']),
                        (dc.date, self.published_online(blob)),  # FIXME .... dangerzone
                    )
                    g = OntGraph()
                    doi = idlib.Doi(id) if not isinstance(id, idlib.Doi) else id  # FIXME idlib streams need to recognize their own type in __new__
                    data = doi.ttl()
                    if data is None:  # blackfynn has some bad settings on their doi records ...
                        return

                    try:
                        g.parse(data=data, format='ttl')  # FIXME network bad
                    except BaseException as e:
                        loge.exception(e)

                    _tr = [s for s, p, o in g if p == crossref_doi_pred]
                    if _tr:
                        _their_record_s = _tr[0]
                        yield s, owl.sameAs, _their_record_s
                        yield from g
                    else:
                        g.debug()
                        log.critical('No crossref doi section in graph!')
                else:
                    msg = f'dont know what to do with {source}'
                    log.error(msg)
                    #raise NotImplementedError(msg)
                    return
            else:
                msg = f'dont know what to do with {blob} for {id.identifier}'
                log.error(msg)
                #raise NotImplementedError(msg)
                return

            for p, oraw in pos:
                if oraw is not None:
                    o = rdflib.Literal(oraw) if not isinstance(oraw, rdflib.URIRef) else oraw
                    yield s, p, o
Пример #5
0
    def triples(self):
        for blob in self.data['identifier_metadata']:
            id = blob['id']
            if not isinstance(id, idlib.Stream):
                id = idlib.Auto(id)

            s = id.asType(rdflib.URIRef)
            if 'source' in blob:
                source = blob[
                    'source']  # FIXME we need to wrap this in our normalized representation
                if source == 'Crossref':  # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl
                    pos = (
                        (rdf.type, owl.NamedIndividual),
                        (rdf.type, TEMP[blob['type']]),
                        (dc.publisher, blob['publisher']),
                        #(dc.type, blob['type']),  # FIXME semantify
                        (dc.title, blob['title']),
                        (dc.date,
                         self.published_online(blob)),  # FIXME .... dangerzone
                    )
                    g = OntGraph()
                    doi = idlib.Doi(id) if not isinstance(
                        id, idlib.Doi
                    ) else id  # FIXME idlib streams need to recognize their own type in __new__
                    g.parse(data=doi.ttl(), format='ttl')  # FIXME network bad
                    _their_record_s = [
                        s for s, p, o in g if p == rdflib.term.URIRef(
                            'http://prismstandard.org/namespaces/basic/2.1/doi'
                        )
                    ][0]
                    yield s, owl.sameAs, _their_record_s
                    yield from g
                else:
                    msg = f'dont know what to do with {source}'
                    log.error(msg)
                    #raise NotImplementedError(msg)
                    return
            else:
                msg = f'dont know what to do with {blob} for {id.identifier}'
                log.error(msg)
                #raise NotImplementedError(msg)
                return

            for p, oraw in pos:
                if oraw is not None:
                    o = rdflib.Literal(oraw) if not isinstance(
                        oraw, rdflib.URIRef) else oraw
                    yield s, p, o
Пример #6
0
def loadall(git_local, repo_name, local=False, dobig=False):
    local_base = jpth(git_local, repo_name)
    lb_ttl = os.path.realpath(jpth(local_base, 'ttl'))

    #match = (rdflib.term.URIRef('http://purl.org/dc/elements/1.1/member'),  # iao.owl
    #rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
    #rdflib.term.URIRef('http://www.w3.org/2002/07/owl#AnnotationProperty'))

    done = []
    filenames = [
        f for g in ('*', '*/*', '*/*/*')
        for f in glob(lb_ttl + '/' + g + '.ttl')
    ]
    graph = OntGraph()
    for f in filenames:
        print(f)
        done.append(os.path.basename(f))
        graph.parse(f, format='turtle')
        #if match in graph:
        #raise BaseException('Evil file found %s' % f)

    def repeat(
            dobig=dobig):  # we don't really know when to stop, so just adjust
        for s, o in graph.subject_objects(owl.imports):
            if os.path.basename(o) not in done and o not in done:
                #if (o, rdf.type, owl.Ontology) not in graph:
                print(o)
                done.append(o)
                ext = os.path.splitext(o)[1]
                fmt = 'turtle' if ext == '.ttl' else 'xml'
                if noneMembers(o, *bigleaves) or dobig:
                    graph.parse(o, format=fmt)
                    #if match in graph:
                    #raise BaseException('Evil file found %s' % o)

    #if local:
    #repeat(False)
    #else:
    if not local:
        for i in range(10):
            repeat(True)

    return graph
Пример #7
0
def npokb_mapping():
    index_graph = OntGraph(path=auth.get_path('ontology-local-repo') /
                           'ttl/generated/neurons/npokb-index.ttl')

    if index_graph.path.exists():
        index_graph.parse()

    # testing
    index_graph.bind('npokb', npokb)
    #[index_graph.add((npokb[str(i)], rdf.type, owl.Class)) for i in range(1, 11)]
    #[index_graph.add((npokb[str(i)], ilxtr.hasTemporaryId, TEMP[str(i)])) for i in range(1, 11)]

    ios = []
    for eff in (
            'common-usage-types',
            'huang-2017',
            'markram-2015',
            'allen-cell-types',
    ):
        # FIXME if the index id is already being used it is still added as a temp id incorrectly
        path = auth.get_path(
            'ontology-local-repo') / f'ttl/generated/neurons/{eff}.ttl'
        org = OntResGit(
            path, ref='HEAD'
        )  # HEAD is default but just for clarity set it explicitly here
        prev_graph = org.graph
        input_graph = OntGraph(path=path)
        input_graph.parse()
        mapped_graph = input_graph.mapStableIdentifiers(
            prev_graph, ilxtr.origLabel)
        output_graph = mapped_graph.mapTempToIndex(index_graph, npokb, TEMP)
        ios.append((mapped_graph, output_graph))

    mapped_graph, output_graph = ios[0]
    a, r, c = output_graph.subjectsChanged(mapped_graph)
    index_graph.write()
    [o.write() for i, o, in ios]  # when ready
    #from sparcur.paths import Path
    #Path(index_graph.path).xopen()
    breakpoint()
Пример #8
0
    def inner(local_filepath, remote=False):
        if noneMembers(local_filepath, *bigleaves) or dobig:
            ext = os.path.splitext(local_filepath)[-1]
            if ext == '.ttl':
                infmt = 'turtle'
            else:
                log.info((ext, local_filepath))
                infmt = None
            if remote:
                resp = requests.get(
                    local_filepath
                )  # TODO nonblocking pull these out, fetch, run inner again until done
                raw = resp.text.encode()
            else:
                try:
                    with open(local_filepath, 'rb') as f:
                        raw = f.read()
                except FileNotFoundError as e:
                    if local_filepath.startswith('file://'):
                        log.info(
                            f'local_imports has already been run, skipping {local_filepath}'
                        )
                        return
                        #raise ValueError('local_imports has already been run') from e
                    else:
                        log.exception(
                            e
                        )  # TODO raise a warning if the file cannot be matched
                        # seems like good practice to have any imported ontology under
                        # version control so all imports are guaranteed to have good
                        # provenance and not split the prior informaiton between the
                        # scigraph config and the repository, the repository remains
                        # the source of truth, load.yaml files can then pick a subset
                        # of the properly tracked files to load as they see fit, but
                        # not add to them (at least in pyontutils land)
                        raw = b''

            if oo in raw:  # we only care if there are imports or an ontology iri
                scratch = OntGraph()
                if infmt == 'turtle':
                    data, rest = raw.split(b'###', 1)
                elif infmt == None:  # assume xml
                    xml_tree = etree.parse(BytesIO(raw))
                    xml_root = xml_tree.getroot()
                    xml_ontology = xml_tree.xpath(
                        "/*[local-name()='RDF']/*[local-name()='Ontology']")
                    xml_root.clear()
                    xml_root.append(xml_ontology[0])
                    data = etree.tostring(xml_root)
                scratch.parse(data=data, format=infmt)
                for s in scratch.subjects(rdf.type, owl.Ontology):
                    triples.add((s, owl.sameAs, rdflib.URIRef(local_filepath)))
                    # somehow this breaks computing the chain
                    #for p in (rdfs.comment, skos.definition, definition, dc.title, rdfs.label):
                    #for o in scratch[s:p]:
                    #triples.add((s, p, o))
                for s, o in sorted(scratch.subject_objects(p)):
                    if revert:
                        raise NotImplementedError('TODO')
                    nlfp = o.replace(remote_base, local_base)
                    triples.add((s, p, o))
                    if 'http://' in local_filepath or 'external' in local_filepath:  # FIXME what to do about https used inconsistently :/
                        if 'external' in local_filepath:
                            imported_iri = rdflib.URIRef(
                                local_filepath.replace(
                                    local_base, remote_base))  # inefficient
                        else:
                            imported_iri = rdflib.URIRef(local_filepath)
                        if s != imported_iri:
                            imported_iri_vs_ontology_iri[
                                imported_iri] = s  # kept for the record
                            triples.add((imported_iri, p,
                                         s))  # bridge imported != ontology iri
                    if local_base in nlfp and 'file://' not in o:  # FIXME file:// should not be slipping through here...
                        scratch.add((s, p, rdflib.URIRef('file://' + nlfp)))
                        scratch.remove((s, p, o))
                    if nlfp not in done:
                        done.append(nlfp)
                        if local_base in nlfp and 'external' not in nlfp:  # skip externals TODO
                            inner(nlfp)
                        elif readonly:  # read external imports
                            if 'external' in nlfp:
                                inner(nlfp)
                            else:
                                inner(nlfp, remote=True)
                if not readonly:
                    _orp = CustomTurtleSerializer.roundtrip_prefixes  # FIXME awful hack :/
                    CustomTurtleSerializer.roundtrip_prefixes = True
                    ttl = scratch.serialize(format='nifttl', encoding='utf-8')
                    CustomTurtleSerializer.roundtrip_prefixes = _orp
                    ndata, comment = ttl.split(b'###', 1)
                    out = ndata + b'###' + rest
                    with open(local_filepath, 'wb') as f:
                        f.write(out)
Пример #9
0
    def processData(cls):
        ids_raw, ids = cls._id_src()
        tree = cls.raw
        r = tree.getroot()
        cs = r.getchildren()
        classes = [
            _ for _ in cs if _.tag == '{http://www.w3.org/2002/07/owl#}Class'
            and _.values()[0] in ids
        ]
        ontology = tree.xpath(
            "/*[local-name()='RDF']/*[local-name()='Ontology']")
        ops = tree.xpath(
            "/*[local-name()='RDF']/*[local-name()='ObjectProperty']")  # TODO
        wanted = [etree.ElementTree(_) for _ in classes]
        rpl_check = tree.xpath(
            "/*[local-name()='RDF']/*[local-name()='Class']/*[local-name()='hasAlternativeId']"
        )
        rpl_dict = {
            _.text: _.getparent()
            for _ in rpl_check if _.text in ids_raw
        }  # we also need to have any new classes that have replaced old ids
        also_classes = list(rpl_dict.values())
        a = ontology + ops + classes + also_classes

        def rec(start_set, done):
            ids_ = set()
            for c in start_set:
                ids_.update([
                    _.items()[0][1] for _ in etree.ElementTree(c).xpath(
                        "/*[local-name()='Class']/*[local-name()='subClassOf']"
                    ) if _.items()
                ])
                ids_.update([
                    _.items()[0][1] for _ in etree.ElementTree(c).xpath(
                        "/*[local-name()='Class']/*[local-name()='subClassOf']/*[local-name()='Restriction']/*[local-name()='someValuesFrom']"
                    ) if _.items()
                ])
            supers = [
                _ for _ in cs
                if _.tag == '{http://www.w3.org/2002/07/owl#}Class'
                and _.values()[0] in ids_ and _ not in done
            ]
            if supers:
                msup, more_ids = rec(supers, done + supers)
                supers += msup
                ids_.update(more_ids)
            return supers, ids_

        more, more_ids = rec(a, a)
        all_nodes = a
        if cls.more:
            all_nodes = a + more

        all_ = set(all_nodes)
        r.clear()  # wipe all the stuff we don't need
        for c in all_:
            r.append(c)
        data = etree.tostring(r)
        g = OntGraph()
        g.parse(
            data=data
        )  # now _this_ is stupidly slow (like 20 minutes of slow) might make more sense to do the xml directly?
        cls.iri = list(
            g.query(
                'SELECT DISTINCT ?match WHERE { ?temp rdf:type owl:Ontology . ?temp owl:versionIRI ?match . }'
            ))[0][0]
        return more, more_ids, g
Пример #10
0
def main():
    olr = auth.get_path('ontology-local-repo')
    resources = auth.get_path('resources')
    if not olr.exists():
        raise FileNotFoundError(f'{olr} does not exist cannot continue')
    if not resources.exists():
        raise FileNotFoundError(f'{resources} does not exist cannot continue')

    PREFIXES = makePrefixes('definition', 'replacedBy', 'hasRole', 'oboInOwl',
                            'CHEBI', 'owl', 'skos', 'oboInOwl')
    ug = makeGraph('utilgraph', prefixes=PREFIXES)
    file = resources / 'chebi-subset-ids.txt'
    with open(file.as_posix(), 'rt') as f:
        ids_raw = set((_.strip() for _ in f.readlines()))
        ids = sorted(set((ug.expand(_.strip()) for _ in ids_raw)))

    def check_chebis(g):
        a = []
        for id_ in ids:
            l = sorted(g.triples((id_, None, None)))
            ll = len(l)
            a.append(ll)
        return a

    def fixIons(g):
        # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to...
        ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI'))
        # atom           ion
        None, 'CHEBI:29108'  # calcium is ok
        ng.replace_uriref('CHEBI:30145', 'CHEBI:49713')  # lithium
        ng.replace_uriref('CHEBI:18248', 'CHEBI:29033')  # iron
        ng.replace_uriref('CHEBI:26216', 'CHEBI:29103')  # potassium
        ng.replace_uriref('CHEBI:26708', 'CHEBI:29101')  # sodium
        None, 'CHEBI:29105'  # zinc is ok

    g = OntGraph()
    cg = OntGraph()
    cd = OntGraph()
    chemg = OntGraph()
    molg = OntGraph()

    cg.parse(olr / 'ttl/generated/chebislim.ttl', format='turtle')
    list(g.add(t) for t in cg)
    a1 = check_chebis(g)

    cd.parse(olr / 'ttl/generated/chebi-dead.ttl', format='turtle')
    list(g.add(t) for t in cd)
    a2 = check_chebis(g)

    chemg.parse(olr / 'ttl/NIF-Chemical.ttl', format='turtle')
    chemgg = makeGraph('NIF-Chemical', graph=chemg)
    fixIons(chemg)
    list(g.add(t) for t in chemg)
    a3 = check_chebis(g)

    molg.parse(olr / 'ttl/NIF-Molecule.ttl', format='turtle')
    molgg = makeGraph('NIF-Molecule', graph=molg)
    fixIons(molg)
    list(g.add(t) for t in molg)
    a4 = check_chebis(g)

    replacedBy = ug.expand('replacedBy:')
    deads = {s: o for s, o in cd.subject_objects(replacedBy)}

    def switch_dead(g):
        ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl'))
        for f, r in deads.items():
            ng.replace_uriref(f, r)
            ng.add_trip(r, 'oboInOwl:hasAlternateId',
                        rdflib.Literal(f, datatype=rdflib.XSD.string))
            g.remove(
                (r, replacedBy, r))  # in case the replaced by was already in

    switch_dead(g)
    switch_dead(cg)
    switch_dead(chemg)
    switch_dead(molg)

    def fixHasAltId(g):
        ng = makeGraph('',
                       graph=g,
                       prefixes=makePrefixes('oboInOwl', 'NIFCHEM', 'NIFRID'))
        ng.replace_uriref('NIFCHEM:hasAlternativeId',
                          'oboInOwl:hasAlternativeId')
        # ng.replace_uriref('NIFRID:ChEBIid', 'oboInOwl:id')  # :id does not exist, do we need an alternative?

    list(map(fixHasAltId, (g, cg, chemg)))

    def fixAltIdIsURIRef(g):
        hai = ug.expand('oboInOwl:hasAlternativeId')
        # i = ug.expand('oboInOwl:id')  # :id does not exist
        makeGraph('', graph=g, prefixes=makePrefixes(
            'CHEBI'))  # amazlingly sometimes this is missing...

        def inner(s, p, o):
            if type(o) == rdflib.URIRef:
                qn = g.namespace_manager.qname(o)
                g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string)))
                if 'ns' in qn:
                    print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o),
                          qn)
                g.remove((s, p, o))

        for s, o in g.subject_objects(hai):
            inner(s, hai, o)
        #for s, o in g.subject_objects(i):  # :id does not exist
        #inner(s, i, o)

    list(map(fixAltIdIsURIRef, (g, cg, chemg)))

    matches = [_ for _ in zip(a1, a2, a3, a4)]
    changed = [len(set(_)) != 1 for _ in matches]
    review = [(id_, m) for id_, changed, m in zip(ids, changed, matches)
              if changed and m[0]]
    # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_c = [
        set([(s, str(o.toPython()))
             for s, p, o in cg.triples((u, None, None))]) for u, _ in review
    ]
    wat_a = [
        set([(s, str(o.toPython())) for s, p, o in g.triples((u, None, None))])
        for u, _ in review
    ]
    wat_c_ = [
        set(cg.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    wat_a_ = [
        set(g.triples((u, None, None))) for u, _ in review
    ]  # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython
    diff = [a - c for a, c in zip(wat_a, wat_c)]
    diff_ = [a - c for a, c in zip(wat_a_, wat_c_)]

    cb = createOntology(
        'chebi-bridge',
        'NIF ChEBI bridge',
        makePrefixes('CHEBI', 'BFO1SNAP', 'owl', 'skos', 'dc', 'hasRole',
                     'NIFCHEM', 'oboInOwl', 'NIFMOL', 'NIFRID'),
        'chebibridge',
        ('This bridge file contains additional annotations'
         ' on top of CHEBI identifiers that were originally'
         ' included in NIF-Chemical or NIF-Molecule that have'
         ' not since been added to CHEBI upstream'),
        path='ttl/bridge/',
        #imports=('https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebislim.ttl',
        #'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebi-dead.ttl'))
        imports=(
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebislim.ttl',
            'http://ontology.neuinfo.org/NIF/ttl/generated/chebi-dead.ttl'))

    out = []
    for set_ in diff:
        for sub, string in sorted(set_):
            for t in g.triples((sub, None, None)):
                # please not that this process will do things like remove hasStreenName ectasy from CHEBI:1391 since chebislim has it listed as a synonym
                py = t[-1].toPython()
                if py == string and not py.startswith(
                        'ub'
                ):  # ignore restrictions... this is safe because nifmol and nifchem dont have any restrictions...
                    cb.add_recursive(t, g)
        cb.add_class(
            sub
        )  # only need to go at the end because sub is the same for each set

    def hasImplicitSuperclass(s, o):
        for super_ in cg.objects(s, rdflib.RDFS.subClassOf):
            if super_ == o:
                return True
            elif hasImplicitSuperclass(super_, o):
                return True

    # curation decisions after review (see outtc for full list)
    curatedOut = []

    def curateOut(*t):
        curatedOut.append(
            tuple(
                ug.expand(_) if type(_) is not rdflib.Literal else _
                for _ in t))
        cb.del_trip(*t)

    curateOut(
        'CHEBI:6887', 'rdfs:subClassOf', 'CHEBI:23367'
    )  # defer to the chebi choice of chemical substance over molecular entity since it is classified as a racemate which doesn't quite match the mol ent def
    curateOut(
        'CHEBI:26519', 'rdfs:subClassOf', 'CHEBI:24870'
    )  # some ions may also be free radicals, but all free radicals are not ions!
    #natural product removal since natural product should probably be a role if anything...
    curateOut('CHEBI:18059', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:24921', 'rdfs:subClassOf', 'CHEBI:33243')
    curateOut('CHEBI:37332', 'rdfs:subClassOf', 'CHEBI:33243')

    curateOut('CHEBI:50906', 'rdfs:label',
              rdflib.Literal('Chemical role', datatype=rdflib.XSD.string)
              )  # chebi already has a chemical role...
    curateOut(
        'CHEBI:22586', 'rdfs:subClassOf', 'CHEBI:24432'
    )  # antioxidant is already modelled as a chemical role instead of a biological role, the distinction is that the biological roles affect biological processes/property, not chemical processes/property
    curateOut('CHEBI:22720', 'rdfs:subClassOf',
              'CHEBI:27171')  # not all children are bicyclic
    curateOut(
        'CHEBI:23447', 'rdfs:subClassOf', 'CHEBI:17188'
    )  # this one seems obviously flase... all cyclic nucleotides are not nucleoside 5'-monophosphate...
    curateOut(
        'CHEBI:24922', 'rdfs:subClassOf', 'CHEBI:27171'
    )  # not all children are bicyclic, some may be poly, therefore removing
    curateOut(
        'CHEBI:48706', 'rdfs:subClassOf', 'CHEBI:33232'
    )  # removing since antagonist is more incidental and pharmacological role is more appropriate (as chebi has it)
    curateOut('CHEBI:51064', 'rdfs:subClassOf',
              'CHEBI:35338')  # removing since chebi models this with has part
    curateOut(
        'CHEBI:8247', 'rdfs:subClassOf', 'CHEBI:22720'
    )  # the structure is 'fused to' a benzo, but it is not a benzo, chebi has the correct
    #curateOut('CHEBI:9463', 'rdfs:subClassOf', 'CHEBI:50786')  # not sure what to make of this wikipedia says one thing, but chebi says another, very strange... not an anabolic agent?!??! wat no idea

    # review hold over subClassOf statements
    intc = []
    outtc = []
    for s, o in cb.g.subject_objects(rdflib.RDFS.subClassOf):
        if str(
                o
        ) == 'http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#_birnlex_retired_class' or str(
                o
        ) == 'http://ontology.neuinfo.org/nif/nifstd/readable/birnlexRetiredClass':
            # we need to remove any of the cases where deprecation was misused
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
        elif hasImplicitSuperclass(s, o):
            cb.g.remove((s, rdflib.RDFS.subClassOf, o))
            intc.append((s, rdflib.RDFS.subClassOf, o))
        else:
            outtc.append((s, rdflib.RDFS.subClassOf, o))

    def qname(trips):
        return tuple(
            tuple(cb.g.namespace_manager.qname(_) for _ in t) for t in trips)

    for a, p, b in sorted(qname(outtc)):
        if 'NIFMOL' in b:
            continue  # not considering cases where NIFMOL/NIFCHEM ids are used, that can come later
        s = sgv.findById(a)
        o = sgv.findById(b)
        if s is None or o is None:
            print(a, '=>', s)
            print(b, '=>', o)
        else:
            print(s['labels'], s['curie'])
            print('subClassOf')
            print(o['labels'], o['curie'])
            print((a, p, b))
        print('---------------------')

    cb.write(
    )  # re-add only the missing edges so that we can zap them from NIF-Molecule and NIF-Chemical (recurse is needed...)

    # validation
    diff2 = set(cb.g) - set(cg)
    diff3 = set(cb.g) - diff2  # should just be all the owl:Class entries
    diff4 = set(cb.g) - set(chemg) | set(cb.g) - set(molg)  # not informative
    diff5 = set(cb.g) - diff4  # not informative
    both = set(chemg) & set(
        molg)  # there is no overlap beyond the owl:Class declarations

    def getChebis(set_):
        return set(t for t in set_ if 'CHEBI_' in t[0])

    def nodt(graph):
        return set((s, str(o) if type(o) is rdflib.Literal else o)
                   for s, p, o in graph)

    cmc = getChebis((((
        (nodt(chemg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    cmc = sorted(t for s, o in cmc for t in chemg.triples((s, None, o)))
    mmc = getChebis((((
        (nodt(molg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) -
                    nodt(curatedOut))
    mmc = sorted(t for s, o in mmc for t in molg.triples((s, None, o)))

    # remove chebi classes from nifchem and nifmol
    def remstuff(sources, targets):
        for source in sources:
            for id_ in source.subjects(rdflib.RDF.type, rdflib.OWL.Class):
                for target in targets:
                    target.del_class(id_)

    remstuff((cg, cd), (chemgg, molgg))

    chemgg.write()
    molgg.write()

    if __name__ == '__main__':
        breakpoint()