Пример #1
0
def flag_dep(json_):
    for node in json_['nodes']:
        if DEP in node['meta']:
            curie = node['id']
            label = node['lbl']
            node['id'] = tc.red(curie)
            node['lbl'] = tc.red(label)
            for edge in json_['edges']:
                if edge['sub'] == curie:
                    edge['sub'] = tc.red(curie)
                elif edge['obj'] == curie:
                    edge['obj'] = tc.red(curie)
Пример #2
0
def flag_dep(json_):
    for node in json_['nodes']:
        if DEP in node['meta']:
            curie = node['id']
            label = node['lbl']
            node['id'] = tc.red(curie)
            node['lbl'] = tc.red(label)
            for edge in json_['edges']:
                if edge['sub'] == curie:
                    edge['sub'] = tc.red(curie)
                elif edge['obj'] == curie:
                    edge['obj'] = tc.red(curie)
Пример #3
0
def spell(filenames, debug=False):
    if hunspell is None:
        raise ImportError('hunspell is not installed on your system. If you want '
                          'to run `ontutils spell` please run pipenv install --dev --skip-lock. '
                          'You will need the development libs for hunspell on your system.')
    spell_objects = (u for r in Parallel(n_jobs=9)(delayed(get_spells)(f) for f in filenames) for u in r)
    hobj = hunspell.HunSpell('/usr/share/hunspell/en_US.dic', '/usr/share/hunspell/en_US.aff')
    #nobj = hunspell.HunSpell(os.path.expanduser('~/git/domain_wordlists/neuroscience-en.dic'), '/usr/share/hunspell/en_US.aff')  # segfaults without aff :x
    collect = set()
    for filename, s, p, o in spell_objects:
        missed = False
        no = []
        for line in o.split('\n'):
            nline = []
            for tok in line.split(' '):
                prefix, tok, suffix = tokstrip(tok)
                #print((prefix, tok, suffix))
                if not hobj.spell(tok):# and not nobj.spell(tok):
                    missed = True
                    collect.add(tok)
                    nline.append(prefix + tc.red(tok) + suffix)
                else:
                    nline.append(prefix + tok + suffix)
            line = ' '.join(nline)
            no.append(line)
        o = '\n'.join(no)
        if missed:
            #print(filename, s, o)
            print('>>>', o)

    if debug:
        [print(_) for _ in sorted(collect)]
        breakpoint()
Пример #4
0
    def _ontology_local_repo(self):
        try:
            stated_repo = Path(self.config['ontology_local_repo'])
        except (KeyError, TypeError, FileNotFoundError) as e:
            stated_repo = Path('/dev/null/does-not-exist')

        maybe_repo = self._maybe_repo
        if stated_repo.exists():
            return stated_repo
        elif maybe_repo.exists():
            return maybe_repo
        else:
            maybe_start = Path(__file__).parent.parent.parent.absolute()
            maybe_base = maybe_start
            fsroot = Path('/')
            while maybe_base != fsroot:
                maybe_repo = maybe_base / self.ontology_repo
                if maybe_repo.exists():
                    log.info(
                        tc.blue('INFO:') +
                        f'Ontology repository found at {maybe_repo}')
                    return maybe_repo
                else:
                    maybe_base = maybe_base.parent
            else:
                log.warning(
                    tc.red('WARNING:') +
                    f'No repository found in any parent directory of {maybe_start}'
                )

        return Path('/dev/null/does-not-exist')  # seems reaonsable ...
Пример #5
0
 def parents(self):
     if not self.orphaned:
         for aid in self.anno.references:
             try:
                 yield self._getAATById(aid)
             except KeyError:
                 self.orphaned = True
                 print(tc.red('WARNING:'), f'parent annotation was deleted from {self.id}')
Пример #6
0
        def fetch_and_save(url, loc):
            resp = requests.get(url)
            saveloc = (path / loc).as_posix()
            if resp.ok:
                with open(saveloc, 'wb') as f:
                    f.write(resp.content)

                print(tc.blue('INFO:'), f'{url:<60} written to {loc}')
            else:
                print(tc.red('WARNING:'), f'failed to fetch {url}')
Пример #7
0
 def sv(asdf, start, ind, warn=False):
     if type(asdf) not in (bool, int) and asdf.startswith('http'):
         for iri, short in scigPrint.shorten.items():
             if iri in asdf:
                 return scigPrint.wrap(asdf.replace(iri, short + ':'),
                                       start, ind)
         print(tc.red('WARNING:'), 'Shorten failed for', tc.ltyellow(asdf))
         return scigPrint.wrap(repr(asdf), start, ind)
     else:
         return scigPrint.wrap(repr(asdf), start, ind)
Пример #8
0
def import_tree(graph, ontologies, **kwargs):
    for ontology in ontologies:
        thisfile = Path(ontology).name
        print(thisfile)
        OntCuries.populate(graph)
        j = graph.asOboGraph('owl:imports', restriction=False)
        try:
            t, te = creatTree(*Query(f'NIFTTL:{thisfile}', 'owl:imports', 'OUTGOING', 30), json=j, prefixes=dict(graph.namespace_manager), **kwargs)
            #print(t)
            yield t, te
        except KeyError:
            print(tc.red('WARNING:'), 'could not find', ontology, 'in import chain')  # TODO zap onts w/o imports
Пример #9
0
    def build_transgenic_lines(self):
        """
        init class     |  "transgenic_line_source_name":"stock_number" a Class
        add superClass |  rdfs:subClassOf ilxtr:transgenicLine
        add *order*    |  ilxtr:useObjectProperty ilxtr:<order>
        add name       |  rdfs:label "name"
        add def        |  definition: "description"
        add transtype  |  rdfs:hasTransgenicType "transgenic_line_type_name"
        """

        triples = []
        for cell_specimen in self.neuron_data:
            for tl in cell_specimen['donor']['transgenic_lines']:
                _id = tl['stock_number'] if tl['stock_number'] else tl['id']
                prefix = tl['transgenic_line_source_name']
                line_type = tl['transgenic_line_type_name']
                if line_type == 'driver' and 'CreERT2' in tl['name']:
                    line_type = 'inducibleDriver'

                if prefix not in ['JAX', 'MMRRC', 'AIBS']:
                    print(tc.red('WARNING:'), 'unknown prefix', prefix,
                          json.dumps(tl, indent=4))
                    continue
                elif prefix == 'AIBS':
                    prefix = 'AllenTL'

                _class = self.ns[prefix][str(_id)]
                triples.append((_class, rdf.type, owl.Class))
                triples.append(
                    (_class, rdfs.label, rdflib.Literal(tl['name'])))
                triples.append(
                    (_class, definition, rdflib.Literal(tl['description'])))
                triples.append((_class, rdfs.subClassOf, ilxtr.transgenicLine))
                triples.append((_class, ilxtr.hasTransgenicType,
                                ilxtr[line_type + 'Line']))

        # TODO aspects.ttl?
        transgenic_lines = simpleOnt(
            filename='allen-transgenic-lines',
            local_base=graphBase.local_base,
            path='ttl/generated/',
            prefixes=self.prefixes,
            triples=triples,
            comment='Allen transgenic lines for cell types',
            branch=self.branch,
            calling__file__=__file__,
        )

        transgenic_lines._graph.write()
Пример #10
0
def spell(filenames, debug=False):
    if hunspell is None:
        raise ImportError(
            'hunspell is not installed on your system. If you want '
            'to run `ontutils spell` please run pipenv install --dev --skip-lock. '
            'You will need the development libs for hunspell on your system.')
    hobj = hunspell.HunSpell('/usr/share/hunspell/en_US.dic',
                             '/usr/share/hunspell/en_US.aff')
    #nobj = hunspell.HunSpell(os.path.expanduser('~/git/domain_wordlists/neuroscience-en.dic'), '/usr/share/hunspell/en_US.aff')  # segfaults without aff :x
    collect = set()
    for filename in filenames:
        missed = False
        no = []
        with open(filename, 'rt') as f:
            for line_ in f.readlines():
                line = line_.rstrip()
                nline = []
                #print(tc.blue(line))
                for pattern in _bads:
                    line = line.replace(pattern, ' ' * len(pattern))

                #print(line)
                for tok in line.split(' '):
                    prefix, tok, suffix = tokstrip(tok)
                    #print((prefix, tok, suffix))
                    if not hobj.spell(tok):  # and not nobj.spell(tok):
                        missed = True
                        collect.add(tok)
                        nline.append(prefix + tc.red(tok) + suffix)
                    else:
                        nline.append(prefix + tok + suffix)
                line = ' '.join(nline)
                no.append(line)

        o = '\n'.join(no)
        if missed:
            #print(filename, s, o)
            print('>>>', o)
            pass

    if debug:
        [print(_) for _ in sorted(collect)]
        breakpoint()
Пример #11
0
def import_tree(graph, ontologies, **kwargs):
    for ontology in ontologies:
        thisfile = Path(ontology).name
        print(thisfile)
        mg = makeGraph('', graph=graph)
        mg.add_known_namespaces('owl', 'obo', 'dc', 'dcterms', 'dctypes',
                                'skos', 'NIFTTL')
        j = mg.make_scigraph_json('owl:imports', direct=True)
        try:
            t, te = creatTree(*Query(f'NIFTTL:{thisfile}', 'owl:imports',
                                     'OUTGOING', 30),
                              json=j,
                              prefixes=mg.namespaces,
                              **kwargs)
            #print(t)
            yield t, te
        except KeyError:
            print(tc.red('WARNING:'), 'could not find', ontology,
                  'in import chain')  # TODO zap onts w/o imports
Пример #12
0
def print_trees(graph, bridge):
    PPO = 'ro:proper_part_of'
    HPP = 'ro:has_proper_part'
    hpp = HPP.replace('ro:', graph.namespaces['ro'])
    ppo = PPO.replace('ro:', graph.namespaces['ro'])
    a, b = creatTree(
        *Query(
            tc.red('birnlex_796'), HPP, 'OUTGOING', 10
        ),  # FIXME seems to be a last one wins bug here with birnlex_796 vs NIFGA:birnlex_796 depending on the has seed...
        json=graph.make_scigraph_json(HPP))
    c, d = creatTree(*Query('NIFGA:birnlex_796', hpp, 'OUTGOING', 10),
                     graph=sgg)
    j = bridge.make_scigraph_json(
        HPP)  # issue https://github.com/RDFLib/rdflib/pull/661
    e, f = creatTree(*Query('UBERON:0000955', HPP, 'OUTGOING', 10), json=j)
    k_, l_ = creatTree(*Query('NIFGA:nlx_anat_101177', ppo, 'INCOMING', 10),
                       graph=sgg)

    merge = dict(d[-1])  # full tree with ppo converted to hpp
    merge['nodes'].extend(l_[-1]['nodes'])
    merge['edges'].extend([{
        'sub': e['obj'],
        'pred': hpp,
        'obj': e['sub']
    } for e in l_[-1]['edges']])
    m_, n_ = creatTree(*Query('NIFGA:birnlex_796', hpp, 'OUTGOING', 10),
                       json=merge)

    print('nifga dep')
    print(a)
    print('nifga live')
    print(c)
    print('new bridge')
    print(e)
    print('nifga total (both directions)')
    print(m_)

    print('nifga white matter')
    print(k_)

    return a, b, c, d, e, f, k_, l_, m_, n_
Пример #13
0
def main():
    from docopt import docopt, parse_defaults
    args = docopt(__doc__, version='ontutils 0.0.1')
    defaults = {
        o.name: o.value if o.argcount else None
        for o in parse_defaults(__doc__)
    }

    verbose = args['--verbose']
    debug = args['--debug']

    repo_name = args['<repo>']

    git_local = os.path.expanduser(args['--git-local'])

    epoch = args['--epoch']

    curies_location = args['--curies']
    curies = getCuries(curies_location)
    curie_prefixes = set(curies.values())

    filenames = args['<file>']
    filenames.sort(key=lambda f: os.path.getsize(f),
                   reverse=True)  # make sure the big boys go first
    refactor_skip = ('nif.ttl', 'resources.ttl', 'generated/chebislim.ttl',
                     'unused/ro_bfo_bridge.ttl', 'generated/ncbigeneslim.ttl',
                     'generated/NIF-NIFSTD-mapping.ttl')
    rfilenames = [f for f in filenames if f not in refactor_skip]

    if args['set']:
        from pyontutils.config import auth
        uc = auth.user_config

        def set_uc(var, value):
            with open(uc._path, 'rt') as f:
                text = f.read()

            if '#' in text:
                msg = f'Comments detected! Not writing config! {uc._path}'
                raise ValueError(msg)

            blob = uc.load()
            # XXX NEVER DUMP A CONFIG THIS YOU _WILL_ KLOBBER IT
            # BY ACCIDENT AT SOME POINT AND WILL ERASE ANY/ALL COMMENTS
            # THERE IS NO SAFETY WITH THIS IMPLEMENTATION
            # USERS SHOULD EDIT THEIR CONFIGS DIRECTLY
            # except that it makes giving instructions for
            # setting values a bit more complicated
            blob['auth-variables'][var] = value
            uc.dump(blob)

        if args['ontology-local-repo']:
            var = 'ontology-local-repo'
            olr = Path(args['<path>']).expanduser().resolve()
            olr_string = olr.as_posix()
            set_uc(var, olr_string)
            value2 = auth.get_path(var)
            if not value2.exists():
                msg = f'{var} path does not exist! {value2}'
                print(tc.red('WARNING'), msg)

            msg = f'{var} path {value2} written to {uc._path}'
            print(msg)
            assert olr == value2

        elif args['scigraph-api-key']:
            # FIXME this is a hack on top of orthauth, which will not
            #
            # check the secrets path first to make sure it is ok
            # be implementing programmtic modification of user config
            # files any time soon, though it might make sense to have a
            # "machine config path" in addition to auth and user config
            path = ['scigraph', 'api', 'key']
            spath = auth._pathit(uc.get_blob('auth-stores', 'secrets')['path'])
            if not spath.parent.exists():
                spath.parent.mkdir(parents=True)
                spath.parent.chmod(0o0700)
            if spath.suffix != '.yaml':
                msg = f"Can't write secrets file of type {spath.suffix}"
                args = None
                raise NotImplementedError(msg)

            v = None
            try:
                s = uc.secrets
                v = s(*path)
            except:
                pass

            if v is not None:
                v = None
                raise ValueError(f'Path already in secrets! {path} in {spath}')

            # safely append to the secrets file
            key = args['<key>']
            path_key = f'\nscigraph:\n  api:\n    key: {key}'
            if not spath.exists():
                spath.touch()
                spath.chmod(0o0600)

            with open(spath, 'a+') as f:
                f.write(path_key)

            # set the config var
            var = 'scigraph-api-key'
            value = {'path': ' '.join(path)}
            set_uc(var, value)  # set the path
            # XXX NOTE yes, it is correct to do this only after secrets succeeds
            # otherwise it is possible to get into a state where secrets does
            # not exist but there is a path pointing to it, so load this
            # ontutils file will fail during import time

            # test that we got the value we expected
            value2 = auth.get(var)
            msg = (f'Key written to secrets. {spath} and path to '
                   f'key was written to config {uc._path}')
            print(msg)
            assert key == value2, 'Key retrieved does not match key set!'

    elif args['devconfig']:
        if args['--write']:
            file = devconfig.write(args['--output-file'])
            print(f'config written to {file}')
        elif args['<field>']:
            for f in args['<field>']:
                print(getattr(devconfig, f, ''))
        else:
            print(devconfig)
    elif args['catalog-extras']:
        catalog_extras(args['--fetch'])
    elif args['version-iri']:
        version_iris(*filenames, epoch=epoch)
    elif args['scigraph-stress']:
        scigraph_stress(int(args['--rate']), int(args['--timeout']), verbose,
                        debug)
    elif args['deadlinks']:
        deadlinks(filenames, int(args['--rate']), int(args['--timeout']),
                  verbose, debug)
    elif args['spell']:
        spell(filenames, debug)
    elif args['iri-commit']:
        make_git_commit_command(git_local, repo_name)
    elif args['uri-switch']:
        uri_switch(rfilenames, uri_switch_values)
    elif args['backend-refactor']:
        backend_refactor(rfilenames, backend_refactor_values)
    elif args['todo']:
        graph = loadall(git_local, repo_name, local=True)
        graph_todo(graph, curie_prefixes, uri_switch_values)
        breakpoint()
    elif args['expand']:
        curies['NLXWIKI'] = 'http://legacy.neurolex.org/wiki/'
        for curie in args['<curie>']:
            prefix, suffix = curie.split(':')
            print(curies[prefix] + suffix)
Пример #14
0
    def equiv(curie, label):
        if curie in manual:
            replaced_by[curie] = manual[curie]
            return manual[curie]

        ec = sgg.getNeighbors(curie, relationshipType='equivalentClass')
        nodes = [n for n in ec['nodes'] if n['id'] != curie]
        if len(nodes) > 1:
            #print('wtf node', [n['id'] for n in nodes], curie)
            for node in nodes:
                id_ = node['id']
                label_ = node['lbl']

                if id_.startswith('UBERON'):
                    if curie in replaced_by:
                        one = replaced_by[curie]
                        replaced_by[curie] = one, id_
                        print('WE GOT DUPES', curie, label, one, id_)  # TODO
                    else:
                        replaced_by[curie] = id_
                else:
                    internal_equivs[curie] = id_
        elif not nodes:
            node = sgg.getNode(curie)['nodes'][0]
            if OWL.deprecated.toPython() in node['meta']:
                print('THIS CLASS IS DEPRECATED', curie)
                lbl = node['lbl']
                if lbl.startswith(
                        'Predominantly white regional') or lbl.startswith(
                            'Predominantly gray regional'):
                    print('\tHE\'S DEAD JIM!', lbl, node['id'])
                    replaced_by[curie] = 'NOREP'
                if IRBC in node['meta']:
                    existing_replaced = node['meta'][IRBC][0]
                    ec2 = sgg.getNeighbors(existing_replaced,
                                           relationshipType='equivalentClass')
                    print('\tFOUND ONE', existing_replaced)
                    #scigPrint.pprint_node(sgg.getNode(existing_replaced))
                    if ec2['edges']:  # pass the buck if we can
                        print('\t', end='')
                        scigPrint.pprint_edge(ec2['edges'][0])
                        rb = ec2['edges'][0]['obj']
                        print('\tPASSING BUCK : (%s -> %s -> %s)' %
                              (curie, existing_replaced, rb))
                        irbcs[curie] = (existing_replaced, rb)
                        replaced_by[curie] = rb
                        return nodes
                    else:
                        er_node = sgv.findById(existing_replaced)
                        if not er_node['deprecated']:
                            if not er_node['curie'].startswith('NIFGA:'):
                                print('\tPASSING BUCK : (%s -> %s)' %
                                      (curie, er_node['curie']))
                                return nodes

                        print(
                            '\tERROR: could not pass buck, we are at a dead end at',
                            er_node)  # TODO
                    print()

            moar = [
                t for t in sgv.findByTerm(label)
                if t['curie'].startswith('UBERON')
            ]
            if moar:
                #print(moar)
                #replaced_by[curie] = moar[0]['curie']
                if len(moar) > 1:
                    print('WARNING', curie, label,
                          [(m['curie'], m['labels'][0]) for m in moar])

                for node in moar:
                    #if node['curie'] in uberon_obsolete:  # node['deprecated']?
                    #continue
                    ns = sgg.getNode(node['curie'])
                    assert len(
                        ns['nodes']) == 1, "WTF IS GOING ON %s" % node['curie']
                    ns = ns['nodes'][0]
                    if _doprint:
                        print(
                            'Found putative replacement in moar: (%s -> %s)' %
                            (curie, ns['id']))
                        if DBX in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][DBX],
                                  node['labels'][0], node['synonyms'])

                        if AID in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][AID],
                                  node['labels'][0], node['synonyms'])

                        if CON in ns['meta']:
                            print(' ' * 8, node['curie'], ns['meta'][CON],
                                  node['labels'][0], node['synonyms'])

                    replaced_by[curie] = ns['id']
            else:
                replaced_by[curie] = None
                if False:  # review
                    print('NO FORWARD EQUIV', tc.red(curie), label)  # TODO
                    for k, v in sorted(
                            sgg.getNode(curie)['nodes'][0]['meta'].items()):
                        if type(v) == iter:
                            print(' ' * 4, k)
                            for _ in v:
                                print(' ' * 8, _)
                        else:
                            print(' ' * 4, k, v)
        else:
            node = nodes[0]
            replaced_by[curie] = node['id']
            exact[curie] = node['id']

        return nodes
Пример #15
0
def main():
    for filename in ('mbaslim', 'hbaslim', 'paxinos-rat-labels',
                     'waxholm-rat-labels'):
        filepath = gitf / 'NIF-Ontology/ttl/generated/parcellation' / (
            filename + '.ttl')
        dir_ = filepath.parent.as_posix()
        print(dir_)
        file_commit = subprocess.check_output(
            [
                'git', 'log', '-n', '1', '--pretty=format:%H', '--',
                filepath.name
            ],
            cwd=dir_,
            stderr=subprocess.DEVNULL).decode().rstrip()
        graph = rdflib.Graph().parse(filepath.as_posix(), format='ttl')
        g = makeGraph('', graph=graph)

        annos = defaultdict(set)
        anno_trips = defaultdict(set)
        for triple, predicate_objects in annotation.parse(graph=graph):
            for a_p, a_o in predicate_objects:
                annos[a_p, a_o].add(triple)
                anno_trips[triple].add((a_p, a_o))

        anno_trips = {k: v for k, v in anno_trips.items()}

        for lifted_triple in restriction.parse(graph=graph):
            graph.add(lifted_triple)

        out_header = 'label|abbrev|curie|superPart curie\n'
        out = []
        editions_header = 'edition|label|abbrev|curie\n'
        editions = []
        for s in graph.subjects(rdf.type, owl.Class):
            rdfsLabel = next(graph.objects(s, rdfs.label))
            try:
                prefLabel = next(graph.objects(s, skos.prefLabel))
            except StopIteration:
                print(tc.red('WARNING:'),
                      f'skipping {s} {rdfsLabel} since it has no prefLabel')
                continue
            syns = sorted(
                graph.objects(s, NIFRID.synonym)
            )  # TODO are there cases where we need to recaptulate what we are doing for for abbrevs?
            abbrevs = sorted(graph.objects(
                s, NIFRID.abbrev))  # FIXME paxinos has more than one
            try:
                if annos:
                    if len(abbrevs) > 1:
                        print(tc.blue('INFO:'), g.qname(s),
                              repr(prefLabel.value), 'has multiple abbrevs',
                              [a.value for a in abbrevs])
                    # prefer latest
                    current_edition = ''
                    for a in abbrevs:
                        for a_p, edition in anno_trips[s, NIFRID.abbrev, a]:
                            if a_p == ilxtr.literalUsedBy:
                                if current_edition < edition:
                                    current_edition = edition
                                    abbrev = a
                else:
                    abbrev = abbrevs[0]
            except IndexError:
                abbrev = ''
            try:
                superPart = next(graph.objects(s, ilxtr.labelPartOf))
            except StopIteration:
                superPart = ''

            out.append(
                f'{prefLabel}|{abbrev}|{g.qname(s)}|{g.qname(superPart)}')

            if annos:
                #asdf = {'ed':{'label':,'abbrev':,'curie':}}
                asdf = defaultdict(dict)
                triple = s, skos.prefLabel, prefLabel
                eds = anno_trips[triple]
                for a_p, a_o in eds:
                    asdf[a_o]['curie'] = g.qname(s)
                    asdf[a_o]['label'] = prefLabel
                for syn in graph.objects(s, NIFRID.synonym):
                    triple = s, NIFRID.synonym, syn
                    eds = anno_trips[triple]
                    for a_p, a_o in eds:
                        asdf[a_o]['curie'] = g.qname(s)
                        if 'label' in asdf[a_o]:
                            print(
                                tc.red('WARNING:'),
                                f'{a_o} already has a label "{asdf[a_o]["label"]}" for "{syn}"'
                            )
                        asdf[a_o]['label'] = syn
                for abbrev in graph.objects(s, NIFRID.abbrev):
                    triple = s, NIFRID.abbrev, abbrev
                    eds = anno_trips[triple]
                    #print('aaaaaaaaaaa', g.qname(s), )
                    for a_p, a_o in eds:
                        asdf[a_o]['curie'] = g.qname(s)
                        if 'abbrev' in asdf[a_o]:
                            print(
                                tc.red('WARNING:'),
                                f'{a_o} already has a abbrev "{asdf[a_o]["abbrev"]}" for "{abbrev}"'
                            )
                        asdf[a_o]['abbrev'] = abbrev

                #print(asdf)
                for ed, kwargs in sorted(asdf.items()):
                    if 'abbrev' not in kwargs:
                        print('Skipping', ed, 'for\n', kwargs)
                        continue
                    editions.append('{ed}|{label}|{abbrev}|{curie}'.format(
                        ed=g.qname(ed), **kwargs))

        with open('/tmp/' + filename + f'-{file_commit[:8]}.psv', 'wt') as f:
            f.write(out_header + '\n'.join(sorted(out, key=labelkey)))
        if editions:
            with open('/tmp/' + filename + f'-editions-{file_commit[:8]}.psv',
                      'wt') as f:
                f.write(editions_header +
                        '\n'.join(sorted(editions, key=edkey)))
Пример #16
0
def printer(*args, **kwargs):
    printe(*(tc.red(repr(a)) for a in args), **kwargs)
Пример #17
0
def would_you_like_to_know_more_question_mark():

    # resolving differences between classes
    more_ids = set((
        'http://uri.neuinfo.org/nif/nifstd/readable/ChEBIid',
        'http://uri.neuinfo.org/nif/nifstd/readable/GOid',
        'http://uri.neuinfo.org/nif/nifstd/readable/MeshUid',
        'http://uri.neuinfo.org/nif/nifstd/readable/PMID',
        'http://uri.neuinfo.org/nif/nifstd/readable/UmlsCui',
        'http://uri.neuinfo.org/nif/nifstd/readable/bamsID',
        'http://uri.neuinfo.org/nif/nifstd/readable/bonfireID',
        'http://uri.neuinfo.org/nif/nifstd/readable/cell_ontology_ID',
        'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationID',
        'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationURI',
        'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDataID',
        'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDiagramID',
        'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceId',
        'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceURI',
        'http://uri.neuinfo.org/nif/nifstd/readable/gbifID',
        'http://uri.neuinfo.org/nif/nifstd/readable/gbifTaxonKeyID',
        'http://uri.neuinfo.org/nif/nifstd/readable/gene_Ontology_ID',
        #'http://uri.neuinfo.org/nif/nifstd/readable/hasExternalSource',
        'http://uri.neuinfo.org/nif/nifstd/readable/hasGenbankAccessionNumber',
        'http://uri.neuinfo.org/nif/nifstd/readable/imsrStandardStrainName',
        'http://uri.neuinfo.org/nif/nifstd/readable/isReplacedByClass',
        'http://uri.neuinfo.org/nif/nifstd/readable/jaxMiceID',
        'http://uri.neuinfo.org/nif/nifstd/readable/ncbiTaxID',
        'http://uri.neuinfo.org/nif/nifstd/readable/neuronamesID',
        'http://uri.neuinfo.org/nif/nifstd/readable/nifID',
        'http://uri.neuinfo.org/nif/nifstd/readable/sao_ID',
        'http://uri.neuinfo.org/nif/nifstd/readable/umls_ID',
        'http://www.geneontology.org/formats/oboInOwl#id',
    ))

    outside = []
    eee = {}
    resolver_not_ilx_only_but_not_in_scigraph = set()  # resources.ttl
    _res = Graph().parse((gitf / 'NIF-Ontology/ttl/resources.ttl').as_posix(), format='turtle')
    reslookup = {uri:[l] for uri, l in _res.subject_objects(rdfs.label)}
    for uri in chain(h_uris, resolver_not_ilx_only):
        if 'uri.neuinfo.org' in uri:
            try:
                meta = sgg.getNode(uri.toPython())['nodes'][0]['meta']
                asdf = {hng.qname(k):v for k, v in meta.items() if k in more_ids}
            except TypeError:
                resolver_not_ilx_only_but_not_in_scigraph.add(uri)  # resources.ttl ;)
                if uri in reslookup:  # no differentia
                    asdf = False
                else:
                    asdf = False
                    print('WTF', uri)
            if asdf:
                #print(uri, asdf)
                eee[uri] = asdf
                for l in asdf.values():
                    for e in l:
                        outside.append(e)

    outside_dupes = [v for v, c in Counter(outside).most_common() if c > 1]
    eee_dupes = {k:v for k, v in eee.items() if anyMembers(outside_dupes, *(e for l in v.values() for e in l))}

    #for uri, meta in sorted(eee_dupes.items(), key=lambda a:sorted(a[1].values())):
        #print(uri.toPython(), sorted((e.replace('PMID: ', 'PMID:'), k) for k, l in meta.items() for e in l))


    # attempt to deal with label mappings
    iexisting = defaultdict(set)
    iiexisting = {}
    for i, existing in zip(datal('ilx'), datal('iri')):
        #if 'uri.neuinfo.org' in existing:
        if 'interlex.org' not in existing and 'neurolex.org' not in existing:
            iexisting[i].add(URIRef(existing))
            iiexisting[URIRef(existing)] = i
    iexisting = {**iexisting}

    _ilabs = {k:l for k, l in zip(datal('ilx'), datal('label'))}
    def inner(iri):
        resp = sgv.findById(iri)
        if resp is not None:
            l = resp['labels']
        else:
            l = [] #_ilabs[iiexisting[iri]] + '** already in ilx **']
            #print('trouble?', iri)  # ilx only
        return iri, l

    #labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in chain(h_uris, (e for s in iexisting.values() for e in s)))}
    labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in h_uris)}
    ilabs = {k:l.lower() for k, l in zip(datal('ilx'), datal('label'))}
    iilabs = {v:k for k, v in ilabs.items()}
    assert len(ilabs) == len(iilabs)
    missing_map = {k:iilabs[v.lower()] for k, v in labs.items() if v and v.lower() in iilabs}  # XXX this is not valid

    missing_existing = {i:[m, *iexisting[i]] for m, i in missing_map.items() if i in iexisting}

    missing_equivs = {next(iter(iexisting[i])):i for m, i in missing_map.items() if i in iexisting}

    eid = NIFRID.externalSourceId.toPython()
    ded = owl.deprecated.toPython()
    # SP: -> swissprot vs uniprot
    mmr = []
    proto_mmr_1_to_1 = {}
    arrr = defaultdict(set)
    uniprot_iuphar = set()
    for uri, ilx_frag in {**missing_equivs, **missing_map}.items():
        uri = URIRef(uri)
        try:
            meta = sgg.getNode(uri.toPython())['nodes'][0]['meta']
        except TypeError:
            # just ignore these, they are ilx only :/
            meta = {}
        if eid in meta:
            src = meta[eid][0]
            if src.startswith('SP:'):
                src = tc.yellow(src.replace('SP:', 'http://www.uniprot.org/uniprot/'))
            #elif src.startswith('IUPHAR:'):
                #pass
            #else:
                #src = 'TODO'
        elif ded in meta and meta[ded]:
            src = tc.red('ded ')
        else:
            src = 'TODO'
        val = labs[uri] if uri in labs else _ilabs[ilx_frag] + ' **'
        if uri in eee:
            differentia = str(eee[uri])
            for v in eee[uri].values():
                for e in v:
                    arrr[e].add(uri)
                    if 'SP:' in e or 'IUPHAR:' in e:
                        uniprot_iuphar.add(uri)
        else:
            differentia = ''

        if uri in _ilx and uri in all_uris:
            ruri = SGG[hng.qname(uri)]
            ruri = tc.blue(f'{ruri:<60}')
        else:
            ruri = uri
            ruri = f'{ruri:<60}'

        v = ' '.join((f'{val:<60}',
                      src,
                      ruri,
                      ilxb[ilx_frag],
                      differentia))
        mmr.append(v)
        proto_mmr_1_to_1[uri] = v
        src = None

    arrr = {**arrr}
    arrr_not_1_to_1 = {k:v for k, v in arrr.items() if len(v) > 1}
    #arrr_n11_uris = set((u.toPython() for v in arrr_not_1_to_1.values() for u in v))
    arrr_n11_uris = set.union(*arrr_not_1_to_1.values())
    mmr_1_to_1 = {k:v for k, v in proto_mmr_1_to_1.items() if k not in arrr_n11_uris}
    no_uniprot = {k:v for k, v in proto_mmr_1_to_1.items() if k not in uniprot_iuphar}
    arrr_n11_text = '\n'.join(f'{k:<15} {sorted(_.toPython() for _ in v)}' for k, v in arrr_not_1_to_1.items())
    mmr.sort()
    mmr_text = '\n'.join(mmr)

    mmr_1_to_1_text = '\n'.join(sorted(mmr_1_to_1.values()))

    no_uniprot_text = '\n'.join(sorted(no_uniprot.values()))
Пример #18
0
    def as_pretty_diff(self,
                       pathmeta,
                       othermeta,
                       pathobject=None,
                       title=None,
                       human=False):
        if title is None:
            if pathobject is not None:
                title = pathobject.relative_to(pathobject.cwd()).as_posix()
            else:
                title = ''

        if pathmeta.content_different(othermeta):
            title = tc.red(title)

        def merge(a, b):
            keys = [k for k, _ in a]
            okeys = [ok for ok, _ in b]
            kind = [(k, okeys.index(k) - i, k) if k in okeys else (None, 0, k)
                    for i, k in enumerate(keys)]
            okind = [(ok, keys.index(ok) - i, ok) if ok in keys else
                     (None, 0, ok) for i, ok in enumerate(okeys)]
            long, short = (kind, okind) if len(kind) > len(okind) else (okind,
                                                                        kind)
            start_align = None
            aligned = []
            offset = 0
            for i, (k, v, av) in enumerate(long):
                i -= offset
                if k is None and start_align is None:
                    aligned.append(av)
                    aligned.append(short[i + v][-1])
                elif k is not None:
                    if v <= 0:
                        start_align = i
                        aligned.append(av)
                    else:
                        for j in range(v):
                            aligned.append(short[i + j][-1])
                        offset += v
                        aligned.append(av)

                elif k is None:
                    aligned.append(av)
                # FIXME what to do if short has a key that long does not?

            lkv = len(a[0])
            lokv = len(b[0])
            kv = {k: [k, v] for k, v in a}
            okv = {k: [k, v] for k, v in b}
            for k in aligned:
                l = kv[k] if k in kv else ([''] * lkv)
                r = okv[k] if k in okv else ([''] * lokv)
                yield l + r

        h = [['Key', 'Value', 'Key Other', 'Value Other']]
        rows = h + list(
            merge(self.rows(pathmeta, human=human),
                  self.rows(othermeta, human=human)))
        try:
            table = AsciiTable(rows, title=title).table
        except TypeError as e:
            breakpoint()
            raise e

        return table