def get_pmid_from_url(url):
    if anyMembers(url,
                  'www.ncbi.nlm.nih.gov/pubmed/',
                  'europepmc.org/abstract/MED/'):
        # TODO validate the suffix
        _, suffix = url.rsplit('/', 1)
        return 'PMID:' + suffix
示例#2
0
def main():
    from docopt import docopt
    args = docopt(__doc__, version='ont-catalog 0.0.1')
    dobig = args['--big']
    remote_base = 'http://ontology.neuinfo.org/NIF/ttl/'
    olr = Path(args['--ontology-local-repo'])
    local_base = (olr / 'ttl').as_posix() + '/'

    #list of all nif ontologies
    #onts = [f for f in fs if f.endswith('.ttl') or f.endswith('.owl') and 'NEMO_' not in f]

    repo = Repo(olr)
    repo_path = Path(olr)
    tracked_files = [
        (repo_path / f).as_posix()
        # FIXME missing scicrunch-registry.ttl
        for f in repo.git.ls_files('--', 'ttl/').split('\n')
        if f.endswith('.ttl') or f.endswith('.owl')
    ]

    #_ = [print(f) for f in fs]

    extra_files = []  # TODO pass in via cli?
    mapping = [(remote_base + fragment, fragment)
               for file in tracked_files + extra_files
               for _, fragment in (file.split('/ttl/', 1), )]

    # check for mismatched import and ontology iris
    itrips = local_imports(
        remote_base, local_base, tracked_files, readonly=True,
        dobig=dobig)  # XXX these files are big and slow, run at own peril
    sa = {os.path.basename(o): s for s, p, o in itrips if 'sameAs' in p}

    # FIXME should be able to do this by checking what is tracked by git...
    externals = ('CogPO.owl', 'NEMO_vv2.98.owl', 'cogat_v0.3.owl', 'doid.owl',
                 'ero.owl', 'pato.owl', 'pr.owl', 'ro_bfo1-1_bridge.owl',
                 'uberon.owl')

    for f in tracked_files + extra_files:
        if '/external/' in f and anyMembers(f, *externals):
            basename = os.path.basename(f)
            if basename in sa:
                target = sa[basename]
                if 'external' not in target:
                    mapping.append((target, 'external/' + basename))

    # make a protege catalog file to simplify life
    uriline = '    <uri id="User Entered Import Resolution" name="{ontid}" uri="{filename}"/>'

    xmllines = ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
    '<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">',] + \
    [uriline.format(ontid=ont, filename=file) for ont,file in sorted(mapping)] + \
    ['</catalog>']
    xml = '\n'.join(xmllines)
    with open(f'{tempfile.tempdir}/nif-catalog-v001.xml', 'wt') as f:
        f.write(xml)

    if args['--debug']:
        breakpoint()
示例#3
0
def extract(og, ng, curie, mkdir=False):
    graph = og.g
    properties = (owl.AnnotationProperty, owl.DatatypeProperty,
                  owl.ObjectProperty)
    deads = [s for s in graph.subjects(owl.deprecated, rdflib.Literal(True))]
    for s in deads:
        types = set(o for o in graph.objects(s, rdf.type))
        if anyMembers(types, *properties):
            p, o = rdfs.subPropertyOf, owl.DeprecatedProperty
        elif owl.Class in types:
            p, o = rdfs.subClassOf, owl.DeprecatedClass
        else:
            continue  # don't bother with named individuals

        trip = (ng.check_thing(s), ng.check_thing(p), ng.check_thing(o))
        if trip not in og.g:
            ng.g.add(trip)

    # TODO cases where owl:deprecated is not used but sco owl:DeprecatedClass is...

    base_alts = list(graph.subject_objects(oboInOwl.hasAlternativeId))
    for replacedByClass, oldClassString in base_alts:
        if curie + ':' in oldClassString or curie + '_' in oldClassString:
            oldClassString = oldClassString.toPython()
            s = ng.check_thing(oldClassString)
            if s not in deads:
                types = set(o
                            for o in graph.objects(replacedByClass, rdf.type))
                if anyMembers(types, *properties):
                    p, o = rdfs.subPropertyOf, owl.DeprecatedProperty
                elif owl.Class in types:
                    p, o = rdfs.subClassOf, owl.DeprecatedClass
                else:
                    continue  # don't bother with named individuals
                [ng.add_trip(s, rdf.type, o) for o in types]
                ng.add_trip(s, p, o)
                ng.add_trip(s, owl.deprecated, True)
                ng.add_trip(s, 'replacedBy:', replacedByClass)

    if mkdir:
        parent = Path(ng.filename).parent
        if not parent.exists():
            parent.mkdir(parents=True)

    ng.write()
示例#4
0
    def reverse_report():
        ilx = Graph()
        ilx.parse('/tmp/interlex.ttl', format='turtle')
        not_in_ontology = set()
        annotations = set()
        relations = set()
        drugbank = set()
        t3db = set()
        for subject in ilx.subjects(rdf.type, owl.Class):
            ok = False
            for object in ilx.objects(subject, oboInOwl.hasDbXref):
                if anyMembers(object, 'uri.neuinfo.org', 'GO_', 'CHEBI_', 'PR_',
                              'PATO_', 'HP_', 'OBI_', 'DOID_', 'COGPO_', 'CAO_',
                              'UBERON_', 'NCBITaxon_', 'SO_', 'IAO_'):
                    # FIXME doe we areally import HP?
                    ok = True

                if (subject, rdf.type, owl.AnnotationProperty) in ilx:  # FIXME for troy these need to be cleared up
                    annotations.add(subject)
                elif (subject, rdf.type, owl.ObjectProperty) in ilx:
                    relations.add(subject)
                elif 'drugbank' in object:
                    drugbank.add(subject)
                elif 't3db.org' in object:
                    t3db.add(subject)

            if not ok:
                not_in_ontology.add(subject)


        drugbank = drugbank & not_in_ontology
        t3db = t3db & not_in_ontology
        annotations = annotations & not_in_ontology
        relations = relations & not_in_ontology
        unaccounted = not_in_ontology - drugbank - t3db - annotations - relations
        report = (
            f'Total       {len(not_in_ontology)}\n'
            f'annotations {len(annotations)}\n'
            f'relations   {len(relations)}\n'
            f'drugbank    {len(drugbank)}\n'
            f't3db        {len(t3db)}\n'
            f'unaccounted {len(unaccounted)}\n'
        )
        print(report)
        return (not_in_ontology, drugbank, unaccounted)
def uri_normalization(uri):
    """ NOTE: this does NOT produce uris """
    try:
        # strip hypothesis extension prefix
        if uri.startswith('chrome-extension://bjfhmglciegochdpefhhlphglcehbmek/content/web/viewer.html?file='):
            junk, uri = uri.split('=', 1)

        # universal fixes
        no_fragment, *_frag = uri.rsplit('#', 1)
        no_trailing_slash = no_fragment.rstrip('/')  # annoying
        _scheme, no_scheme = no_trailing_slash.split('://', 1)

        # special cases
        if 'frontiersin.org' in no_scheme:
            # og:url on frontiers is incorrect
            no_scheme = no_scheme.replace('article/', 'articles/')
        elif 'fasebj.org' in no_scheme:  # FIXME this one has _all_ the variants :/
            no_scheme = (no_scheme
                         .replace('.abstract', '')
                         .replace('.full', '')
                         .replace('.pdf', '')
            )
        elif no_scheme.endswith('?needAccess=true'):
            no_scheme = no_scheme[:-len('?needAccess=true')]
        elif '?systemMessage' in no_scheme:
            no_scheme, junk = no_scheme.rsplit('?systemMessage', 1)

        # specific fixes
        if anyMembers(no_scheme,
                      'acs.org',
                      'ahajournals.org',
                      'biologicalpsychiatryjournal.com',
                      'ebiomedicine.com',
                      'fasebj.org',
                      'frontiersin.org',
                      'future-science.com',
                      'hindawi.com',
                      'ieee.org',
                      'jclinepi.com',
                      'jpeds.com',
                      'liebertpub.com',
                      'mitpressjournals.org',
                      'molbiolcell.org',
                      'molmetab.com',
                      'neurobiologyofaging.org',
                      'physiology.org',
                      'sagepub.com',
                      'sciencedirect.com',
                      'tandfonline.com',
                      'theriojournal.com',
                      'wiley.com',):
            # NOTE not all the above hit all of these
            # almost all still resolve
            normalized = (no_scheme
                          .replace('/abstract', '')
                          .replace('/abs', '')
                          .replace('/fulltext', '')
                          .replace('/full', '')
                          .replace('/pdf', ''))
        #elif ('sciencedirect.com' in no_scheme):
            #normalized = (no_scheme
                          #.replace('/abs', ''))
        elif ('cell.com' in no_scheme):
            normalized = (no_scheme  # FIXME looks like cell uses /abstract in og:url
                          .replace('/abstract', '/XXX')
                          .replace('/fulltext', '/XXX'))
        elif 'jneurosci.org' in no_scheme:
            # TODO content/early -> resolution_chain(doi)
            normalized = (no_scheme
                          .replace('.short', '')
                          .replace('.long', '')
                          .replace('.full', '')
                          .replace('.pdf', '')
                          # note .full.pdf is a thing
                          )
        elif 'pnas.org' in no_scheme:
            normalized = (no_scheme
                          .replace('.short', '')
                          .replace('.long', '')
                          .replace('.full', ''))
        elif 'mdpi.com' in no_scheme:
            normalized = (no_scheme
                          .replace('/htm', ''))
        elif 'f1000research.com' in no_scheme:
            # you should be ashamed of yourselves for being in here for this reason
            normalized, *maybe_version = no_scheme.rsplit('/v', 1)
        elif 'academic.oup.com' in no_scheme:
            normalized, *maybesr = no_scheme.rsplit('?searchresult=', 1)
            _normalized, maybe_junk = normalized.rsplit('/', 1)
            numbers = '0123456789'
            if (maybe_junk[0] not in numbers or  # various ways to detect the human readable junk after the id
                maybe_junk[-1] not in numbers or
                '-' in maybe_junk or
                len(maybe_junk) > 20):
                normalized = _normalized
        elif anyMembers(no_scheme,
                        'jci.org',
                        'nature.com'):
            # cases where safe to remove query fragment
            normalized, *_query = no_scheme.rsplit('?', 1)
            normalized, *table_number = normalized.rsplit('/tables/', 1)
        elif 'pubmed/?term=' in no_scheme and noneMembers(no_scheme, ' ', '+'):
            normalized = no_scheme.replace('?term=', '')
        elif 'nih.gov/pubmed/?' in no_scheme:
            # FIXME scibot vs client norm?
            normalized = no_scheme.replace(' ', '+')
        elif 'govhttp' in no_scheme:
            # lol oh dear
            hrm, oops = no_scheme.split('govhttp')
            ded, wat = oops.split('//', 1)
            blargh, suffix = wat.split('/', 1)
            normalized = hrm + 'gov/pmc/' + suffix
        elif 'table/undtbl' in no_scheme:
            normalized, table_number = no_scheme.rsplit('table/undtbl')
        elif anyMembers(no_scheme,
                        'index.php?',
                       ):
            # cases where we just use hypothes.is normalization
            _scheme, normalized = uri_normalize(uri).split('://')  # FIXME h dependency
        else:
            normalized = no_scheme

        'onlinelibrary.wiley.com/doi/10.1002/cne.23727?wol1URL=/doi/10.1002/cne.23727&regionCode=US-CA&identityKey=e2523300-b934-48c9-b08e-940de05d7335'
        'www.jove.com/video/55441/?language=Japanese'
        'www.nature.com/neuro/journal/v19/n5/full/nn.4282.html'
        'www.nature.com/cr/journal/vaop/ncurrent/full/cr201669a.html'
        'https://www.nature.com/articles/cr201669'

        #{'www.ingentaconnect.com/content/umrsmas/bullmar/2017/00000093/00000002/art00006':
         #[OntId('DOI:10.5343/bms.2016.1044'), OntId('DOI:info:doi/10.5343/bms.2016.1044')]}

        # pmid extract from pmc
        #<meta name="citation_pmid" content="28955177">
        return normalized


    except ValueError as e:  # split fail
        pdf_prefix = 'urn:x-pdf:'
        if uri.startswith(pdf_prefix):
            return uri
        elif uri in bad_uris:
            print('AAAAAAAAAAAAAAAAAAAAAAAAAAA', uri)
            return 'THIS URI IS GARBAGE AND THIS IS ITS NORMALIZED FORM'
        else:
            raise TypeError(uri) from e
示例#6
0
# check for mismatched import and ontology iris
itrips = local_imports(remote_base,
                       local_base,
                       onts,
                       readonly=True,
                       dobig=True)
sa = {os.path.basename(o): s for s, p, o in itrips if 'sameAs' in p}

# FIXME should be able to do this by checking what is tracked by git...
externals = ('CogPO.owl', 'NEMO_vv2.98.owl', 'cogat_v0.3.owl', 'doid.owl',
             'ero.owl', 'pato.owl', 'pr.owl', 'ro_bfo1-1_bridge.owl',
             'uberon.owl')

for f in fs:
    if '/external/' in f and anyMembers(f, *externals):
        basename = os.path.basename(f)
        if basename in sa:
            target = sa[basename]
            if 'external' not in target:
                mapping.append((target, 'external/' + basename))

# make a protege catalog file to simplify life
uriline = '    <uri id="User Entered Import Resolution" name="{ontid}" uri="{filename}"/>'

xmllines = ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
            '<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">',] + \
[uriline.format(ontid=ont, filename=file) for ont,file in sorted(mapping)] + \
['</catalog>']
xml = '\n'.join(xmllines)
with open('/tmp/nif-catalog-v001.xml', 'wt') as f:
示例#7
0
 def already_released_or_skipped(self):
     return any(anyMembers(r.tags, self.REPLY_TAG) for r in self.replies)
示例#8
0
def swapUriSwitch(trip, ureps, fragment_prefixes):
    for spo in trip:
        if not isinstance(spo, rdflib.URIRef):
            yield spo, None, None
            continue
        elif spo in ureps:
            new_spo = ureps[spo]
            rep = (new_spo, owl.sameAs, spo)
            if 'nlx_' in new_spo:
                pref = 'nlx_'
            elif '/readable/' in new_spo:
                pref = 'NIFRID'
            else:
                pref = 'NIFSTD'
            yield new_spo, rep, pref
            continue
        elif anyMembers(
                spo,  # backend refactor
                'BIRNLex_annotation_properties.owl#',
                'OBO_annotation_properties.owl#'):
            _, suffix = spo.rsplit('#', 1)
            new_spo = rdflib.URIRef(
                os.path.join(NIFSTDBASE, 'readable', suffix))
            rep = (new_spo, owl.sameAs, spo)
            pref = 'NIFRID'
            yield new_spo, rep, pref
            continue

        try:
            uri_pref, fragment = spo.rsplit('#', 1)
            if '_' in fragment:
                frag_pref, p_suffix = fragment.split('_', 1)
                if not p_suffix[0].isdigit():
                    p, suffix = p_suffix.split('_', 1)
                    frag_pref = frag_pref + '_' + p
                else:
                    suffix = p_suffix
                frag_pref_ = frag_pref + '_'
                if frag_pref_ in fragment_prefixes:
                    if frag_pref_ == 'nlx_sub_': pref = 'nlx_subcell_'
                    elif frag_pref_ == 'nif_organ_': pref = 'nlx_organ_'
                    else:
                        pref = frag_pref_  # come on branch predictor you can do it!
                elif frag_pref_ == 'nlx_neuron_':  # special case
                    rest = 'nt_'
                    suffix = suffix[len(rest):]
                    pref = frag_pref_ + rest
                else:
                    yield spo, None, None
                    continue
            elif 'sao' in fragment:
                suffix = fragment[3:].strip('-')
                pref = 'sao'
            else:
                yield spo, None, None
                continue
            new_spo = rdflib.URIRef(NIFSTDBASE + pref + suffix)
            if new_spo != spo:
                rep = (new_spo, owl.sameAs, spo)
            else:
                rep = None
                print('Already converted', spo)
            yield new_spo, rep, pref
        except ValueError:  # there was no # so do not split
            yield spo, None, None
            continue
示例#9
0
    def __init__(self, args, **kwargs):
        kwargs = {k: None if v == 'None' else v for k, v in kwargs.items()}
        self.__dict__.update(kwargs)
        self._updated = False
        if self.check_built:
            self.build_only = True
        self._host = HOST
        self._user = USER
        self.args = args
        self.ontload_args = {
            k: None if v == 'None' else v
            for k, v in args.items()
        }  # send them all!
        self.ontload_args['scigraph'] = self.services
        if self.all or self.graph:
            self.ontload_args['graph'] = True
        self.ontload_args.update({
            'imports': None,
            'chain': None,
            'extra': None,
            '<ontologies>': []
        })
        mode = [
            k for k, v in self.args.items()
            if not k.startswith('-') and not k.startswith('<') and v
        ]
        self.mode = mode[0] if mode else None
        self.build_services_config(
        )  # needed to update self.graph_folder  XXX hack fixme
        self._init_more()

        self.same_remotes = False
        if self.local and self.build_only:
            if self.check_built:
                self.local_dispatch()
            return
        elif self.build_host == self.services_host and self.build_user == self.build_user:
            self.same_remotes = True
            # the executor is different from the remotes
            if self.build_host != self._host and not self.check_built:
                #self._host = self.build_host
                #self._user = self.build_user
                self._building = False
                for name, obj in inspect.getmembers(self):
                    continue  # TODO there is a bug here with executor/build boundaries
                    # i think the issues is with the way we are calling anyMembers
                    if inspect.ismethod(obj) and anyMembers(
                            name, 'config', 'services', 'graph', 'remote'):

                        @wraps(obj)
                        def mutex_on_ssh(*args,
                                         func=obj,
                                         **kwargs):  # ah late binding hacks
                            if not self._building:
                                self._building = True
                                out = func(*args, **kwargs)
                                self._building = False
                                if out.startswith('('):
                                    #out = f'"{out[1:-1]}"'
                                    out = out[1:-1]
                                print('YAY FOR ONLY ONE SSH!')
                                return out
                                #return f'ssh {self._user}@{self._host} {out}'
                            else:
                                return func(*args, **kwargs)

                        setattr(self, name, mutex_on_ssh)
            elif not self.local:
                print('WARNING: all servers are equivalent to localhost '
                      'but you are running without --local. Did you mean to?')
        elif self.build_host == self._host and self.build_user == self._user:
            if not self.local:
                print('WARNING: all servers are equivalent to localhost '
                      'but you are running without --local. Did you mean to?')
示例#10
0
def would_you_like_to_know_more_question_mark():

    # resolving differences between classes
    more_ids = set((
        'http://uri.neuinfo.org/nif/nifstd/readable/ChEBIid',
        'http://uri.neuinfo.org/nif/nifstd/readable/GOid',
        'http://uri.neuinfo.org/nif/nifstd/readable/MeshUid',
        'http://uri.neuinfo.org/nif/nifstd/readable/PMID',
        'http://uri.neuinfo.org/nif/nifstd/readable/UmlsCui',
        'http://uri.neuinfo.org/nif/nifstd/readable/bamsID',
        'http://uri.neuinfo.org/nif/nifstd/readable/bonfireID',
        'http://uri.neuinfo.org/nif/nifstd/readable/cell_ontology_ID',
        'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationID',
        'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationURI',
        'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDataID',
        'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDiagramID',
        'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceId',
        'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceURI',
        'http://uri.neuinfo.org/nif/nifstd/readable/gbifID',
        'http://uri.neuinfo.org/nif/nifstd/readable/gbifTaxonKeyID',
        'http://uri.neuinfo.org/nif/nifstd/readable/gene_Ontology_ID',
        #'http://uri.neuinfo.org/nif/nifstd/readable/hasExternalSource',
        'http://uri.neuinfo.org/nif/nifstd/readable/hasGenbankAccessionNumber',
        'http://uri.neuinfo.org/nif/nifstd/readable/imsrStandardStrainName',
        'http://uri.neuinfo.org/nif/nifstd/readable/isReplacedByClass',
        'http://uri.neuinfo.org/nif/nifstd/readable/jaxMiceID',
        'http://uri.neuinfo.org/nif/nifstd/readable/ncbiTaxID',
        'http://uri.neuinfo.org/nif/nifstd/readable/neuronamesID',
        'http://uri.neuinfo.org/nif/nifstd/readable/nifID',
        'http://uri.neuinfo.org/nif/nifstd/readable/sao_ID',
        'http://uri.neuinfo.org/nif/nifstd/readable/umls_ID',
        'http://www.geneontology.org/formats/oboInOwl#id',
    ))

    outside = []
    eee = {}
    resolver_not_ilx_only_but_not_in_scigraph = set()  # resources.ttl
    _res = Graph().parse((gitf / 'NIF-Ontology/ttl/resources.ttl').as_posix(), format='turtle')
    reslookup = {uri:[l] for uri, l in _res.subject_objects(rdfs.label)}
    for uri in chain(h_uris, resolver_not_ilx_only):
        if 'uri.neuinfo.org' in uri:
            try:
                meta = sgg.getNode(uri.toPython())['nodes'][0]['meta']
                asdf = {hng.qname(k):v for k, v in meta.items() if k in more_ids}
            except TypeError:
                resolver_not_ilx_only_but_not_in_scigraph.add(uri)  # resources.ttl ;)
                if uri in reslookup:  # no differentia
                    asdf = False
                else:
                    asdf = False
                    print('WTF', uri)
            if asdf:
                #print(uri, asdf)
                eee[uri] = asdf
                for l in asdf.values():
                    for e in l:
                        outside.append(e)

    outside_dupes = [v for v, c in Counter(outside).most_common() if c > 1]
    eee_dupes = {k:v for k, v in eee.items() if anyMembers(outside_dupes, *(e for l in v.values() for e in l))}

    #for uri, meta in sorted(eee_dupes.items(), key=lambda a:sorted(a[1].values())):
        #print(uri.toPython(), sorted((e.replace('PMID: ', 'PMID:'), k) for k, l in meta.items() for e in l))


    # attempt to deal with label mappings
    iexisting = defaultdict(set)
    iiexisting = {}
    for i, existing in zip(datal('ilx'), datal('iri')):
        #if 'uri.neuinfo.org' in existing:
        if 'interlex.org' not in existing and 'neurolex.org' not in existing:
            iexisting[i].add(URIRef(existing))
            iiexisting[URIRef(existing)] = i
    iexisting = {**iexisting}

    _ilabs = {k:l for k, l in zip(datal('ilx'), datal('label'))}
    def inner(iri):
        resp = sgv.findById(iri)
        if resp is not None:
            l = resp['labels']
        else:
            l = [] #_ilabs[iiexisting[iri]] + '** already in ilx **']
            #print('trouble?', iri)  # ilx only
        return iri, l

    #labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in chain(h_uris, (e for s in iexisting.values() for e in s)))}
    labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in h_uris)}
    ilabs = {k:l.lower() for k, l in zip(datal('ilx'), datal('label'))}
    iilabs = {v:k for k, v in ilabs.items()}
    assert len(ilabs) == len(iilabs)
    missing_map = {k:iilabs[v.lower()] for k, v in labs.items() if v and v.lower() in iilabs}  # XXX this is not valid

    missing_existing = {i:[m, *iexisting[i]] for m, i in missing_map.items() if i in iexisting}

    missing_equivs = {next(iter(iexisting[i])):i for m, i in missing_map.items() if i in iexisting}

    eid = NIFRID.externalSourceId.toPython()
    ded = owl.deprecated.toPython()
    # SP: -> swissprot vs uniprot
    mmr = []
    proto_mmr_1_to_1 = {}
    arrr = defaultdict(set)
    uniprot_iuphar = set()
    for uri, ilx_frag in {**missing_equivs, **missing_map}.items():
        uri = URIRef(uri)
        try:
            meta = sgg.getNode(uri.toPython())['nodes'][0]['meta']
        except TypeError:
            # just ignore these, they are ilx only :/
            meta = {}
        if eid in meta:
            src = meta[eid][0]
            if src.startswith('SP:'):
                src = tc.yellow(src.replace('SP:', 'http://www.uniprot.org/uniprot/'))
            #elif src.startswith('IUPHAR:'):
                #pass
            #else:
                #src = 'TODO'
        elif ded in meta and meta[ded]:
            src = tc.red('ded ')
        else:
            src = 'TODO'
        val = labs[uri] if uri in labs else _ilabs[ilx_frag] + ' **'
        if uri in eee:
            differentia = str(eee[uri])
            for v in eee[uri].values():
                for e in v:
                    arrr[e].add(uri)
                    if 'SP:' in e or 'IUPHAR:' in e:
                        uniprot_iuphar.add(uri)
        else:
            differentia = ''

        if uri in _ilx and uri in all_uris:
            ruri = SGG[hng.qname(uri)]
            ruri = tc.blue(f'{ruri:<60}')
        else:
            ruri = uri
            ruri = f'{ruri:<60}'

        v = ' '.join((f'{val:<60}',
                      src,
                      ruri,
                      ilxb[ilx_frag],
                      differentia))
        mmr.append(v)
        proto_mmr_1_to_1[uri] = v
        src = None

    arrr = {**arrr}
    arrr_not_1_to_1 = {k:v for k, v in arrr.items() if len(v) > 1}
    #arrr_n11_uris = set((u.toPython() for v in arrr_not_1_to_1.values() for u in v))
    arrr_n11_uris = set.union(*arrr_not_1_to_1.values())
    mmr_1_to_1 = {k:v for k, v in proto_mmr_1_to_1.items() if k not in arrr_n11_uris}
    no_uniprot = {k:v for k, v in proto_mmr_1_to_1.items() if k not in uniprot_iuphar}
    arrr_n11_text = '\n'.join(f'{k:<15} {sorted(_.toPython() for _ in v)}' for k, v in arrr_not_1_to_1.items())
    mmr.sort()
    mmr_text = '\n'.join(mmr)

    mmr_1_to_1_text = '\n'.join(sorted(mmr_1_to_1.values()))

    no_uniprot_text = '\n'.join(sorted(no_uniprot.values()))
示例#11
0
def main():
    from docopt import docopt
    args = docopt(__doc__, version='rridxp 0.0.0')
    print(args)
    from scibot.release import get_annos, Curation, SamePMID, MultiplePMID, MultipleDOI, MPP, MPD
    annos = get_annos()
    [Curation(a, annos) for a in annos]

    def midr():
        mp = multiIssue(MultiplePMID(Curation))
        md = multiIssue(MultipleDOI(Curation))
        # filtering by url first removes any detectable instances of multiple dois/pmids
        #mpp = multiIssue(MPP(Curation))
        #mpd = multiIssue(MPD(Curation))
        with open('multiple-pmids.json', 'wt') as f:
            json.dump(mp, f, sort_keys=True, indent=4, cls=Encode)
        with open('multiple-dois.json', 'wt') as f:
            json.dump(md, f, sort_keys=True, indent=4, cls=Encode)

    if args['multi-id-report']:
        midr()

    elif args['csv']:
        substrings = args['<filter>']  # ['MGI', 'IMSR']
        if substrings:
            ssj = '-'.join(ss.lower() for ss in substrings) + '-'
        else:
            substrings = ['']
            ssj = 'all-'

        pmids2 = SamePMID(
            set(annotation for paper in Curation._papers.values()
                for rrid, annotations in paper.items()
                if rrid is not None and anyMembers(rrid, *substrings)
                for annotation in annotations))

        now = UTCNOW()
        rows = [[
            'PMID', 'DOI', 'URI', 'shareLink', 'exact', 'rrid', 'public_tags'
        ]]
        rows += sorted(
            deNone(anno.pmid, anno.doi, anno.uri, anno.shareLink, anno.exact,
                   anno.rrid, ','.join(
                       [t for t in anno.public_tags if 'RRID:' not in t]))
            for pmid, papers in pmids2.items() for rrids in papers.values()
            for annos in rrids.values() for anno in annos)
        with open(f'{ssj}rrids-{now}.csv', 'wt') as f:
            csv.writer(f, lineterminator='\n').writerows(rows)

        nomatch = [[
            'PMID', 'DOI', 'URI', 'shareLink', 'exact', 'rrid', 'public_tags'
        ]]
        nomatch += sorted(
            deNone(anno.pmid, anno.doi, anno.uri, anno.shareLink, anno.exact,
                   anno.rrid, ','.join(
                       [t for t in anno.public_tags if 'RRID:' not in t]))
            for pmid, papers in pmids2.items() for rrids in papers.values()
            for annos in rrids.values() for anno in annos
            if anno.exact and anno.rrid and anno.exact not in anno.rrid)

        with open(f'{ssj}rrids-nomatch-{now}.csv', 'wt') as f:
            csv.writer(f, lineterminator='\n').writerows(nomatch)

    if args['--debug']:
        embed()