Пример #1
0
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, 'existing'):
            e_config = Config('cut-development')
            e_config.load_existing()
            # FIXME clear use case for the remaining bound to whatever query produced it rather
            # than the other way around ... how to support this use case ...
            cls.existing = {n.origLabel.toPython():n for n in e_config.existing_pes}
            cls.existing.update({n.id_:n for n in e_config.existing_pes})
            cls.query = oq.OntQuery(oq.plugin.get('rdflib')(e_config.core_graph), instrumented=OntTerm)
            cls.sgv = Vocabulary()

        return super().__new__(cls)
Пример #2
0
    def __new__(cls, *args, **kwargs):
        if not hasattr(cls, 'existing'):
            e_config = Config('common-usage-types')
            e_config.load_existing()
            # FIXME clear use case for the remaining bound to whatever query produced it rather
            # than the other way around ... how to support this use case ...
            cls.existing = {str(n.origLabel): n for n in e_config.neurons()}
            cls.query = oq.OntQuery(oq.plugin.get('rdflib')(
                e_config.core_graph),
                                    instrumented=OntTerm)
            cls.sgv = Vocabulary()

        return super().__new__(cls)
Пример #3
0
import os
from collections import defaultdict, namedtuple
import rdflib
from rdflib import URIRef, RDFS, RDF, OWL
from rdflib.namespace import SKOS
import requests
from pyontutils.scigraph import Vocabulary, Graph
from pyontutils.utils import TODAY, async_getter, TermColors as tc
from pyontutils.scig import scigPrint
from pyontutils.hierarchies import creatTree, flatten
from pyontutils.core import devconfig, OntMeta, makePrefixes, makeGraph
from pyontutils.core import NIFRID, oboInOwl
from IPython import embed

sgg = Graph(cache=True)
sgv = Vocabulary(cache=True)

Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth'])

CON = oboInOwl.consider
DBX = oboInOwl.hasDbXref  # FIXME also behaves as objectProperty :/
AID = oboInOwl.hasAlternativeId
IRBC = NIFRID.isReplacedByClass

PREFIXES = makePrefixes(
    'UBERON',
    'ro',
    'owl',
    'skos',
)
NIFPREFIXES = makePrefixes(
Пример #4
0
#!/usr/bin/env python3
import os
import csv
import json
from pathlib import Path
from datetime import date
import rdflib
from rdflib.extras import infixowl
from pyontutils.core import makeGraph
from pyontutils.config import devconfig
from pyontutils.scigraph import Vocabulary
from pyontutils.namespaces import makePrefixes
from IPython import embed

v = Vocabulary()

PREFIXES = makePrefixes('ilx', 'owl', 'skos', 'NIFSTD', 'NIFRID', 'SAO', 'NIFEXT', 'NLXCELL')
PREFIXES.update({
    'HBP_CELL':'http://www.hbp.FIXME.org/hbp_cell_ontology/',
})


def expand(curie):
    prefix, suffix = curie.split(':')
    return rdflib.URIRef(PREFIXES[prefix] + suffix)


def ilx_get_start():
    with open(Path(devconfig.ontology_local_repo,
                   'interlex_reserved.txt').as_posix(), 'rt') as f:
        for line in f.readlines()[::-1]:  # go backward to find the first non empty
Пример #5
0
def sheet_to_neurons(values, notes_index, expect_pes):
    # TODO import existing ids to register by label
    sgv = Vocabulary()
    e_config = Config('common-usage-types')
    e_config.load_existing()
    query = oq.OntQuery(oq.plugin.get('rdflib')(e_config.core_graph), instrumented=OntTerm)
    # FIXME clear use case for the remaining bound to whatever query produced it rather
    # than the other way around ... how to support this use case ...
    existing = {str(n.origLabel):n for n in e_config.neurons()}
    def convert_header(header):
        if header.startswith('has'):  # FIXME use a closed namespace
            return ilxtr[header]
        else:
            return None

    def convert_other(header):
        if header == 'label':
            return rdfs.label
        elif header == 'curie':
            return rdf.type
        elif header == 'definition':
            return definition
        else:
            header = header.replace(' ', '_')
            return TEMP[header]  # FIXME

    def mapCell(cell, syns=False):
        search_prefixes = ('UBERON', 'CHEBI', 'PR', 'NCBITaxon', 'NCBIGene', 'ilxtr', 'NIFEXT', 'SAO', 'NLXMOL',
                           'BIRNLEX',)

        if ':' in cell and ' ' not in cell:
            log.debug(cell)
            if 'http' in cell:
                if cell.startswith('http'):
                    t = OntTerm(iri=cell)
                else:
                    return None, None  # garbage with http inline
            else:
                t = OntTerm(cell, exclude_prefix=('FMA',))  # FIXME need better error message in ontquery

            return t.u, t.label

        result = [r for r in sgv.findByTerm(cell, searchSynonyms=syns, prefix=search_prefixes)
                  if not r['deprecated']]
        #printD(cell, result)
        if not result:
            log.debug(f'{cell}')
            maybe = list(query(label=cell, exclude_prefix=('FMA',)))
            if maybe:
                qr = maybe[0]
                return qr.OntTerm.u, qr.label
            elif not syns:
                return mapCell(cell, syns=True)
            else:
                return None, None
        elif len(result) > 1:
            #printD('WARNING', result)
            result = select_by_curie_rank(result)
        else:
            result = result[0]

        return rdflib.URIRef(result['iri']), result['labels'][0]

    def lower_check(label, cell):
        return label not in cell and label.lower() not in cell.lower()  # have to handle comma sep case

    lnlu = {v:k for k, v in LogicalPhenotype.local_names.items()}
    def convert_cell(cell_or_comma_sep):
        #printD('CONVERTING', cell_or_comma_sep)
        for cell_w_junk in cell_or_comma_sep.split(','):  # XXX WARNING need a way to alter people to this
            cell = cell_w_junk.strip()
            if cell.startswith('(OR') or cell.startswith('(AND'):
                start, *middle, end = cell.split('" "')
                OPoperator, first = start.split(' "')
                operator = OPoperator[1:]
                operator = lnlu[operator]
                last, CP = end.rsplit('"')
                iris, labels = [], []
                for term in (first, *middle, last):
                    iri, label = mapCell(term)
                    if label is None:
                        label = cell_or_comma_sep
                    iris.append(iri)
                    labels.append(label)

                yield (operator, *iris), tuple(labels)

            else:
                iri, label = mapCell(cell)
                if label is None:
                    yield iri, cell_or_comma_sep  # FIXME need a way to handle this that doesn't break things?
                else:
                    yield iri, label

    config = Config('cut-roundtrip')
    skip = 'alignment label',
    headers, *rows = values
    errors = []
    new = []
    release = []
    for i, neuron_row in enumerate(rows):
        id = None
        label_neuron  = None
        definition_neuron = None
        synonyms_neuron = None
        current_neuron = None
        phenotypes = []
        do_release = False
        predicate_notes = {}
        object_notes = {}
        other_notes = {}
        wat = {}
        for j, (header, cell) in enumerate(zip(headers, neuron_row)):
            notes = list(process_note(get_note(i + 1, j, notes_index)))  # + 1 since headers is removed
            if notes and not header.startswith('has'):
                _predicate = convert_other(header)
                if cell:
                    _object = rdflib.Literal(cell)  # FIXME curies etc.
                else:
                    _object = rdf.nil
                other_notes[_predicate, _object] = notes

            if header == 'curie':
                id = OntId(cell).u if cell else None
                continue
            elif header == 'label':
                label_neuron = cell
                if cell in existing:
                    current_neuron = existing[cell]
                elif cell:
                    # TODO
                    new.append(cell)
                else:
                    raise ValueError(cell)  # wat
                continue
            elif header == 'Status':
                # TODO
                if cell == 'Yes':
                    do_release = True
                elif cell == 'Maybe':
                    pass
                elif cell == 'Not yet':
                    pass
                elif cell == 'Delete':
                    pass
                else:
                    pass

                continue
            elif header == 'PMID':
                # TODO
                continue
            elif header == 'Other reference':
                # TODO
                continue
            elif header == 'Other label':
                # TODO
                continue
            elif header == 'definition':
                continue  # FIXME single space differences between the spreadsheet and the source

                if cell:
                    definition_neuron = rdflib.Literal(cell)

                continue

            elif header == 'synonyms':
                if cell:
                    synonyms_neuron = [rdflib.Literal(s.strip())
                                    # FIXME bare comma is extremely dangerous
                                    for s in cell.split(',')]

                continue
            elif header in skip:
                continue

            objects = []
            if cell:
                predicate = convert_header(header)
                if predicate is None:
                    log.debug(f'{(header, cell, notes)}')

                for object, label in convert_cell(cell):
                    if isinstance(label, tuple):  # LogicalPhenotype case
                        _err = []
                        for l in label:
                            if lower_check(l, cell):
                                _err.append((cell, label))
                        if _err:
                            errors.extend(_err)
                        else:
                            objects.append(object)
                    elif lower_check(label, cell):
                        errors.append((cell, label))
                    elif str(id) == object:
                        errors.append((header, cell, object, label))
                        object = None
                    else:
                        objects.append(object)

                if notes:
                    # FIXME this is a hack to only attach to the last value
                    # since we can't distinguish at the moment
                    wat[predicate, object] = notes
                    if object is not None:
                        # object aka iri can be none if we don't find anything
                        object_notes[object] = notes
                    else:
                        predicate_notes[predicate] = notes
                        # FIXME it might also be simpler in some cases
                        # to have this be object_notes[object] = notes
                        # because we are much less likely to have the same
                        # phenotype appear attached to the different dimensions

                        # FIXME comma sep is weak here because the
                        # reference is technically ambiguous
                        # might be an argument for the denormalized form ...
                        # or perhaps having another sheet for cases like that

            else:
                continue

            if predicate and objects:
                for object in objects:  # FIXME has layer location phenotype
                    if isinstance(object, tuple):
                        op, *rest = object
                        pes = (Phenotype(r, predicate) for r in rest)  # FIXME nonhomogenous phenotypes
                        phenotypes.append(LogicalPhenotype(op, *pes))
                    elif object:
                        phenotypes.append(Phenotype(object, predicate))
                    else:
                        errors.append((object, predicate, cell))
            elif objects:
                errors.append((header, objects))
            else:
                errors.append((header, cell))
            # translate header -> predicate
            # translate cell value to ontology id

        if current_neuron and phenotypes:
            # TODO merge current with changes
            # or maybe we just replace since all the phenotypes should be there?
            log.debug(phenotypes)
            if id is not None:
                log.debug(f'{(id, bool(id))}')

            elif label_neuron:
                id = make_cut_id(label_neuron)

            if id not in expect_pes:
                log.error(f'{id!r} not in cuts!?')
                continue

            if expect_pes[id] != len(phenotypes):
                log.error(f'{id!r} failed roundtrip {len(phenotypes)} != {expect_pes[id]}')
                continue

            neuron = NeuronCUT(*phenotypes, id_=id, label=label_neuron,
                               override=bool(id) or bool(label_neuron))
            neuron.adopt_meta(current_neuron)
            # FIXME occasionally this will error?!
        else:
            continue  # FIXME this polutes everything ???
            fn = fixname(label_neuron)
            if not phenotypes and i:  # i skips header
                errors.append((i, neuron_row))  # TODO special review for phenos but not current
                phenotypes = Phenotype('TEMP:phenotype/' + fn),

            neuron = NeuronCUT(*phenotypes,
                               id_=make_cut_id(label_neuron),
                               label=label_neuron, override=True)

        # update the meta if there were any changes
        if definition_neuron is not None:
            neuron.definition = definition_neuron

        if synonyms_neuron is not None:
            neuron.synonyms = synonyms_neuron

        try:
            neuron.batchAnnotateByObject(object_notes)
            neuron.batchAnnotate(other_notes)
        except AttributeError as e:
            #embed()
            log.exception(e) #'something very strage has happened\n', e)
            pass  # FIXME FIXME FIXME

        #neuron.batchAnnotateByPredicate(predicate_notes)  # TODO
        # FIXME doesn't quite work in this context, but there are other
        # cases where annotations to the general modality are still desireable
        # FIXME there may be no predicate? if the object fails to match?

        if do_release:
            release.append(neuron)

    return config, errors, new, release
Пример #6
0
def main():
    resources = auth.get_path('resources')
    if not resources.exists():
        raise FileNotFoundError(f'{resources} does not exist cannot continue')

    with open((auth.get_path('git-local-base') /
               'entity_mapping/mappings/uberon-nervous').as_posix(),
              'rt') as f:
        brain_only = set([l.strip() for l in f.readlines()])

    sgv = Vocabulary(cache=True)
    sgg = Graph(cache=True)

    g = rdflib.Graph()
    g.parse((auth.get_path('ontology-local-repo') /
             'ttl/generated/parcellation/cocomacslim.ttl').as_posix(),
            format='turtle')
    sos = [so for so in g.subject_objects(rdflib.RDFS.label)]

    map_ = []
    smap_ = []
    fma_lookup = {}
    for s, o in sos:
        cc_id = g.qname(s)
        cc_label = o.toPython()
        existing_id = None
        existing_label = None
        existing_fma = ''
        s_existing_id = None
        s_existing_label = None
        s_existing_fma = ''

        cands = sgv.findByTerm(o)
        if not cands:
            cands = []
            scands = sgv.searchByTerm(o)
            if not scands:
                scands = []
        else:
            scands = []

        for cand in cands:
            existing_fma = ''
            if 'curie' in cand:
                existing_id = cand['curie']
            elif 'cocomac' in cand['iri']:
                continue
            else:
                raise ValueError(f'What is this thing? {curie["iri"]}')

            existing_label = cand['labels'][0]
            if existing_id.startswith('UBERON'):
                if existing_id not in brain_only:
                    existing_id = None
                    existing_label = None
                    existing_fma = ''
                else:
                    if existing_id in fma_lookup:
                        existing_fma = fma_lookup[existing_id]
                    else:
                        meta = sgg.getNode(existing_id)['nodes'][0]['meta']
                        if dbx in meta:
                            xrefs = meta[dbx]
                            for ref in xrefs:
                                if ref.startswith('FMA:'):
                                    existing_fma += ref
                        fma_lookup[existing_id] = existing_fma
                    break
            #elif cand['curie'].startswith('NIFGA'):
            #elif cand['curie'].startswith('MBA'):

        if existing_id:
            map_.append(
                (cc_label, cc_id, existing_label, existing_id, existing_fma))

        for scand in scands:

            if 'cocomac' in scand['iri']:
                continue
            elif not scand['curie']:
                continue  # good old remove the key instead of set it to None

            s_existing_fma = ''
            if scand['curie'].startswith('UBERON'):
                if scand['curie'] in brain_only:
                    s_existing_id = scand['curie']
                    s_existing_label = scand['labels'][0]
                    if not s_existing_id:
                        print(scand)
                        continue
                    asdf = sgg.getNode(s_existing_id)
                    #print(asdf, s_existing_id, s_existing_label)
                    if s_existing_id in fma_lookup:
                        s_existing_fma = fma_lookup[s_existing_id]
                    else:
                        meta = asdf['nodes'][0]['meta']
                        if dbx in meta:
                            xrefs = meta[dbx]
                            for ref in xrefs:
                                if ref.startswith('FMA:'):
                                    s_existing_fma += ref
                        fma_lookup[s_existing_id] = s_existing_fma
                    smap_.append((cc_label, cc_id, s_existing_label,
                                  s_existing_id, s_existing_fma))
                #break  # FOW :/

    _ = [
        print(a) for a in sorted(smap_, key=lambda a: int(a[1].split(':')[1]))
    ]
    with open('/tmp/coco_uber_match.csv', 'wt') as f:
        writer = csv.writer(f)
        writer.writerows(map_)
    with open('/tmp/coco_uber_search.csv', 'wt') as f:
        writer = csv.writer(f)
        writer.writerows(smap_)

    # cocomac -> integrated connectivity terminiology mapping

    def lnc(string):
        return string.lower().replace(',', ' ')  # matches the conv in NIF_conn

    ccslim = rdflib.Graph().parse(
        (auth.get_path('ontology-local-repo') /
         'ttl/generated/parcellation/cocomacslim.ttl').as_posix(),
        format='turtle')
    coco_all = [l for l in ccslim.objects(None, rdflib.RDFS.label)]

    intcon = resources / 'NIF_conn_allcols_minimal_clean_filtered2.csv'
    with open(intcon.as_posix(), 'rt') as f:
        ber_rows = [r for r in csv.reader(f)]

    ber_set = set(
        [c for c in zip(*[r for r in ber_rows if r[0] == 'CoCoMac'])][1])
    coco_match_lower_no_comma = set(
        [lnc(t) for t in [c for c in zip(*map_)][0]])
    if smap_:
        coco_search_lower_no_comma = set(
            [lnc(t) for t in [c for c in zip(*smap_)][0]])
    else:
        coco_search_lower_no_comma = set()
    coco_all_lower_no_comma = set([lnc(t) for t in coco_all])
    matched = ber_set.intersection(coco_match_lower_no_comma)
    searched = ber_set.intersection(coco_search_lower_no_comma)
    alled = ber_set.intersection(coco_all_lower_no_comma)
    unmapped = alled.difference(matched.union(searched))
    missing = ber_set.difference(alled)

    nmatch = len(matched)
    nsearch = len(searched)
    nall = len(alled)
    nunmapped = len(unmapped)
    nmissing = len(missing)

    print('# matched =', nmatch)
    print('# searched =', nsearch)
    print('# alled =', nall)
    print('# unmatched =', nunmapped)
    print('# missing =', nmissing)

    print('missing')
    for m in sorted(missing):
        print(m)

    print('unmapped')
    for m in sorted(unmapped):
        print(m)
Пример #7
0
def main():
    DB_URI = 'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{db}'
    if socket.gethostname() != 'orpheus':
        config = mysql_conn_helper('localhost', 'nif_eelg', 'nif_eelg_secure', 33060)  # see .ssh/config
    else:
        config = mysql_conn_helper('nif-mysql.crbs.ucsd.edu', 'nif_eelg', 'nif_eelg_secure')
    engine = create_engine(DB_URI.format(**config), echo=True)
    config = None
    del(config)

    insp = inspect(engine)
    terms = [c['name'] for c in insp.get_columns('terms')]
    term_existing_ids = [c['name'] for c in insp.get_columns('term_existing_ids')]
    #breakpoint()
    #sys.exit()

    query = engine.execute('SELECT * FROM term_existing_ids as teid JOIN terms as t ON t.id = teid.tid WHERE t.type != "cde"')
    header = term_existing_ids + terms

    data = query.fetchall()
    cdata = list(zip(*data))

    def datal(head):
        return cdata[header.index(head)]

    ilx_labels = {ilxb[ilx_fragment]:label for ilx_fragment, label in zip(datal('ilx'), datal('label'))}

    mapping_no_sao = [p for p in zip(datal('iri'), datal('ilx')) if 'neuinfo' in p[0]]  # 9446
    mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'neuinfo' in p[0] or '/sao' in p[0]]  # 9883
    done = [ilx for iri, ilx in mapping]
    obo_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'obolibrary' in p[0] and p[1] not in done]
    done = done + [ilx for iri, ilx in obo_mapping]
    db_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'drugbank' in p[0] and p[1] not in done]
    done = done + [ilx for iri, ilx in db_mapping]
    t3db_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 't3db' in p[0] and p[1] not in done]
    done = done + [ilx for iri, ilx in t3db_mapping]

    wiki_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'neurolex' in p[0] and p[1] not in done]

    sao_mapping = {o.toPython():s for s, o in Graph().parse((gitf / 'nlxeol/sao-nlxwiki-fixes.ttl').as_posix(), format='ttl').subject_objects(oboInOwl.hasAlternativeId)}

    scr = Graph().parse((gitf / 'NIF-Ontology/scicrunch-registry.ttl').as_posix(), format='turtle')
    moved_to_scr = {}
    #PROBLEM = set()
    for s, o in scr.subject_objects(oboInOwl.hasDbXref):
        if 'SCR_' in o:
            print(f'WARNING Registry identifier listed as alt id! {s} hasDbXref {o}')
            continue
        uri = NIFSTD[o]
        #try:
        assert uri not in moved_to_scr, f'utoh {uri} was mapped to more than one registry entry! {s} {moved_to_scr[uri]}'
        #except AssertionError:
            #PROBLEM.add(uri)

        moved_to_scr[uri] = s

    to_scr = [(k, v) for k, v in moved_to_scr.items()
           if noneMembers(k, 'SciEx_', 'OMICS_', 'rid_', 'SciRes_',
                          'biodbcore-', 'C0085410', 'doi.org', 'C43960',
                          'doi:10.', 'GAZ:',
                          # 'birnlex_', 'nlx_', 'nif-'
                         )]

    replacement_graph = createOntology(filename='NIFSTD-ILX-mapping',
                        name='NLX* to ILX equivalents',
                        prefixes=makePrefixes('ILX'),)

    scr_rep_graph = createOntology(filename='NIFSTD-SCR-mapping',
                                   name='NLX* to SCR equivalents',
                                   prefixes=makePrefixes('SCR'),)

    _existing = {}
    def dupes(this, other, set_, dupes_):
        if this not in set_:
            set_.add(this)
            _existing[this] = other
        elif _existing[this] != other:
            dupes_[this].add(_existing[this])
            dupes_[this].add(other)

    iri_done = set()
    ilx_done = set()
    iri_dupes = defaultdict(set)
    ilx_dupes = defaultdict(set)
    def check_dupes(iri, ilx):
        dupes(iri, ilx, iri_done, iri_dupes)
        dupes(ilx, iri, ilx_done, ilx_dupes)

    BIRNLEX = Namespace(uPREFIXES['BIRNLEX'])
    trouble = [  # some are _2 issues :/
               # in interlex -- YES WE KNOW THEY DONT MATCH SOME IDIOT DID THIS IN THE PAST
               BIRNLEX['1006'],  # this one appears to be entirely novel despite a note that it was created in 2006...
               BIRNLEX['1152'],  # this was used in uberon ;_;
               BIRNLEX['2476'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2477'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2478'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2479'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2480'],  # can be owl:sameAs ed -> _2 version
               BIRNLEX['2533'],  # This is in interlex as a wiki id http://uri.interlex.org/base/ilx_0109349 since never used in the ontology, we could add it to the list of 'same as' for cosmetic purposes which will probably happen...
               BIRNLEX['3074'],  # -> CHEBI:26848  # add to slim and bridge...
               BIRNLEX['3076'],  # -> CHEBI:26195  # XXX when we go to load chebi make sure we don't dupe this...
    ]

    aaaaaaaaaaaaaaaaaaaaaaaaaaaaa = [t + '_2' for t in trouble]  # _never_ do this

    # TODO check for cases where there is an ilx and scr for the same id >_<

    sao_help = set()
    for iri, ilx_fragment in chain(mapping, to_scr):  # XXX core loop
        if iri in sao_mapping:
            uri = sao_mapping[iri]
            sao_help.add(uri)
        else:
            uri = URIRef(iri)

        if uri in trouble:
            #print('TROUBLE', iri, ilxb[ilx_fragment])
            print('TROUBLE', ilxb[ilx_fragment])

        if uri in moved_to_scr:  # TODO I think we need to have _all_ the SCR redirects here...
            s, p, o = uri, ilxtr.hasScrId, moved_to_scr[uri]
            scr_rep_graph.g.add((s, p, o))
        else:
            s, p, o = uri, ilxtr.hasIlxId, ilxb[ilx_fragment]
            #s, p, o = o, ilxtr.ilxIdFor, s
            replacement_graph.g.add((s, p, o))

        check_dupes(s, o)

    dupes = {k:v for k, v in iri_dupes.items()}
    idupes = {k:v for k, v in ilx_dupes.items()}
    assert not dupes, f'there are duplicate mappings for an external id {dupes}'
    #print(ilx_dupes)  # there are none yet

    ng = cull_prefixes(replacement_graph.g, prefixes=uPREFIXES)
    ng.filename = replacement_graph.filename

    sng = cull_prefixes(scr_rep_graph.g, prefixes=uPREFIXES)
    sng.filename = scr_rep_graph.filename


    _ = [print(k.toPython(), ' '.join(sorted(ng.qname(_.toPython()) for _ in v))) for k, v in idupes.items()]

    # run `resolver_uris = sorted(set(e for t in graph for e in t if 'uri.neuinfo.org' in e))` on a graph with everything loaded to get this file...
    resources = Path(__file__).resolve().absolute().parent / 'resources'
    with open((resources / 'all-uri.neuinfo.org-uris.pickle').as_posix(), 'rb') as f:
        all_uris = pickle.load(f)  # come in as URIRefs...
    with open((resources / 'all-uri.neuinfo.org-uris-old.pickle').as_posix(), 'rb') as f:
        all_uris_old = pickle.load(f)  # come in as URIRefs...
    with open((resources / 'all-uri.neuinfo.org-uris-old2.pickle').as_posix(), 'rb') as f:
        all_uris_old2 = pickle.load(f)  # come in as URIRefs...

    resolver_uris = set(e for t in chain(ng.g, sng.g) for e in t if 'uri.neuinfo.org' in e)
    ilx_only = resolver_uris - all_uris  # aka nlxonly
    resolver_not_ilx_only = resolver_uris - ilx_only
    problem_uris = all_uris - resolver_uris
    old_uris = all_uris_old - all_uris
    old_uris2 = all_uris_old2 - all_uris
    dold_uris = all_uris_old - all_uris_old2

    #idold_uris = all_uris_old2 - all_uris_old  # empty as expected
    #nxrefs = Graph().parse((gitf / 'NIF-Ontology/ttl/generated/nlx-xrefs.ttl').as_posix(), format='turtle')
    nxrefs = Graph().parse((gitf / 'nlxeol/nlx-xrefs.ttl').as_posix(), format='turtle')
    xrefs_uris = set(e for t in nxrefs for e in t if 'uri.neuinfo.org' in e)
    test_old_uris = old_uris2 - xrefs_uris

    diff_uris = test_old_uris - ilx_only
    #diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd/nlx_149160'))  # ORNL was included in an old bad version of the xrefs file and was pulled in in the old all-uris  # now dealt with by the scr mapping
    diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd/nlx_40280,birnlex_1731'))  # one of the doubled neurolex ids
    diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd'))  # i have zero idea how this snuck in
    assert not diff_uris, 'old uris and problem uris should be identical'

    _ilx = set(e for t in ng.g for e in t)
    _scr = set(e for t in sng.g for e in t)
    for uri in ilx_only:
        if uri in _ilx and uri in _scr:
            raise BaseException('AAAAAAAAAAAAAAAAAAAAAAAAAAAAA')
        elif uri in _ilx:
            g = ng.g
        elif uri in _scr:
            g = sng.g
        else:
            raise BaseException('????????????')
        g.add((uri, ilxtr.isDefinedBy, URIRef('http://neurolex.org')))

    # XXX write the graphs
    ng.write()
    sng.write()

    nsuris = set(uri for uri, ilx in mapping_no_sao)
    auris = set(_.toPython() for _ in all_uris)
    iuris = set(_.toPython() for _ in resolver_uris)
    #sao_missing = iuris - nsuris  # now fixed and cannot run due to addition of scr ids to resolver_uris
    #assert not sao_missing, f'whoops {sao_missing}'
    ilx_missing = auris - iuris
    all_missing = iuris - auris
    #assert not all_missing, f'all is not all! {all_missing}'  # XXX have to deal with ilx_only separately as NLX-ILX or something

    # fixed
    #sao_add = {o.toPython():s.toPython() for s, p, o in ng.g if s.toPython() in sao_missing}
    #assert len(sao_add) == len(sao_missing), 'EEEEEEEEEEEEEEE'
    #with open('/tmp/please-add-these-sao-ids-as-existing-ids-to-the-listed-interlex-record.json', 'wt') as f:
        #json.dump(sao_add, f, indent=2)

    to_review = sorted(ilx_missing)

    # not relevant anymore
    #with open('thought-to-be-missing.json', 'rt') as f:
        #thought_to_be_missing = json.load(f)

    # from troy has issues
    #with open('nifext-duplicates-and-new.json', 'rt') as f:
        #nifext_data = json.load(f)

    #nifext_dupes = {v['current_nifext_id']:v['dropped_nifext_ids'][-1] if v['dropped_nifext_ids'] else None for v in nifext_data.values()}

    sgv = Vocabulary(cache=True)
    trts = [(v, (sgv.findById(v)['labels'][0]
                 if sgv.findById(v)['labels']
                 else '<--NO-LABEL-->')
             if sgv.findById(v)
             else '<------>')
            for v in to_review]

    sgg = sGraph(cache=True)
    SGG = Namespace(sgg._basePath.rstrip('/') + '/graph/')
    rg = Graph().parse((gitf / 'NIF-Ontology/ttl/unused/NIF-Retired.ttl').as_posix(), format='turtle')
    retired = set(e.toPython() for t in rg for e in t if 'uri.neuinfo.org' in e)
    retfile = '<ttl/unused/NIF-Retired.ttl>'
    help_graph = createOntology(filename='NIFSTD-BLACKHOLE-mapping',
                        name='HELPPPPPPPP!!!!',
                        prefixes=uPREFIXES,)
    def make_rt(to_review_tuples, retired=retired):
        def inner(u, l, retired=retired):
            ne = sgg.getNeighbors(u, relationshipType="isDefinedBy", depth=1)
            if ne:
                curie = help_graph.qname(u)
                help_graph.g.add((URIRef(u), ilxtr.SciGraphLookup, URIRef(f'http://scigraph.olympiangods.org/scigraph/graph/{curie}')))
            if ne and ne['edges']:
                src = ' '.join([f'<{e["obj"]}>' for e in ne["edges"]])
            elif u in retired:
                src = retfile
            else:
                src = '<>'
            return f'{u:<70} {l:<50} {src}'
        out = Async(rate=3000)(deferred(inner)(u, l) for u, l in sorted(to_review_tuples, key=lambda a:a[-1]))
        return '\n'.join(out)

    review_text = make_rt(trts)
    trts2 = [(u, l) for u, l in trts if 'nifext' not in u]
    not_nifext = make_rt(trts2)

    hng = cull_prefixes(help_graph.g, prefixes=uPREFIXES)
    hng.filename = help_graph.filename
    hng.write()

    ###
    #   Accounting of uri.neuinfo.org ids that do not resolve
    ###

    not_in_interlex = set(s for s, o in hng.g.subject_objects(ilxtr.SciGraphLookup))
    bh_deprecated = set(s for s in hng.g.subjects() if sgv.findById(s) and sgv.findById(s)['deprecated'])
    bh_not_deprecated = set(s for s in hng.g.subjects() if sgv.findById(s) and not sgv.findById(s)['deprecated'])
    bh_nifexts = set(s for s in bh_not_deprecated if 'nifext' in s)
    bh_readable = set(s for s in bh_not_deprecated if 'readable' in s)
    unaccounted = not_in_interlex - bh_readable - bh_nifexts - bh_deprecated
    namedinds = set(s for s in unaccounted
                    if sgv.findById(s) and
                    sgg.getNode(s)['nodes'][0]['meta']['types'] and
                    sgg.getNode(s)['nodes'][0]['meta']['types'][0] == 'NamedIndividual')
    unaccounted = unaccounted - namedinds
    ual = sorted(o for s in unaccounted for o in hng.g.objects(s, ilxtr.SciGraphLookup))
    report = (
        f'Total       {len(not_in_interlex)}\n'
        f'deprecated  {len(bh_deprecated)}\n'
        f'nd nifext   {len(bh_nifexts)}\n'
        f'nd readable {len(bh_readable)}\n'
        f'nd namedind {len(namedinds)}\n'
        f'unaccounted {len(unaccounted)}\n'
             )
    print(report)

    def reverse_report():
        ilx = Graph()
        ilx.parse('/tmp/interlex.ttl', format='turtle')
        not_in_ontology = set()
        annotations = set()
        relations = set()
        drugbank = set()
        t3db = set()
        for subject in ilx.subjects(rdf.type, owl.Class):
            ok = False
            for object in ilx.objects(subject, oboInOwl.hasDbXref):
                if anyMembers(object, 'uri.neuinfo.org', 'GO_', 'CHEBI_', 'PR_',
                              'PATO_', 'HP_', 'OBI_', 'DOID_', 'COGPO_', 'CAO_',
                              'UBERON_', 'NCBITaxon_', 'SO_', 'IAO_'):
                    # FIXME doe we areally import HP?
                    ok = True

                if (subject, rdf.type, owl.AnnotationProperty) in ilx:  # FIXME for troy these need to be cleared up
                    annotations.add(subject)
                elif (subject, rdf.type, owl.ObjectProperty) in ilx:
                    relations.add(subject)
                elif 'drugbank' in object:
                    drugbank.add(subject)
                elif 't3db.org' in object:
                    t3db.add(subject)

            if not ok:
                not_in_ontology.add(subject)


        drugbank = drugbank & not_in_ontology
        t3db = t3db & not_in_ontology
        annotations = annotations & not_in_ontology
        relations = relations & not_in_ontology
        unaccounted = not_in_ontology - drugbank - t3db - annotations - relations
        report = (
            f'Total       {len(not_in_ontology)}\n'
            f'annotations {len(annotations)}\n'
            f'relations   {len(relations)}\n'
            f'drugbank    {len(drugbank)}\n'
            f't3db        {len(t3db)}\n'
            f'unaccounted {len(unaccounted)}\n'
        )
        print(report)
        return (not_in_ontology, drugbank, unaccounted)

    _, _, un = reverse_report()

    h_uris = set(e for t in hng.g for e in t if 'uri.neuinfo.org' in e)
    real_problems = problem_uris - h_uris

    ###
    #   Missing neurons
    ###

    with open((gitf / 'nlxeol/neuron_data_curated.csv').as_posix()) as f:
        r = csv.reader(f)
        nheader = next(r)
        rows = list(r)

    ndata = list(zip(*rows))

    def datan(head):
        return ndata[nheader.index(head)]

    if __name__ == '__main__':
        breakpoint()