def get_pmid_from_url(url): if anyMembers(url, 'www.ncbi.nlm.nih.gov/pubmed/', 'europepmc.org/abstract/MED/'): # TODO validate the suffix _, suffix = url.rsplit('/', 1) return 'PMID:' + suffix
def main(): from docopt import docopt args = docopt(__doc__, version='ont-catalog 0.0.1') dobig = args['--big'] remote_base = 'http://ontology.neuinfo.org/NIF/ttl/' olr = Path(args['--ontology-local-repo']) local_base = (olr / 'ttl').as_posix() + '/' #list of all nif ontologies #onts = [f for f in fs if f.endswith('.ttl') or f.endswith('.owl') and 'NEMO_' not in f] repo = Repo(olr) repo_path = Path(olr) tracked_files = [ (repo_path / f).as_posix() # FIXME missing scicrunch-registry.ttl for f in repo.git.ls_files('--', 'ttl/').split('\n') if f.endswith('.ttl') or f.endswith('.owl') ] #_ = [print(f) for f in fs] extra_files = [] # TODO pass in via cli? mapping = [(remote_base + fragment, fragment) for file in tracked_files + extra_files for _, fragment in (file.split('/ttl/', 1), )] # check for mismatched import and ontology iris itrips = local_imports( remote_base, local_base, tracked_files, readonly=True, dobig=dobig) # XXX these files are big and slow, run at own peril sa = {os.path.basename(o): s for s, p, o in itrips if 'sameAs' in p} # FIXME should be able to do this by checking what is tracked by git... externals = ('CogPO.owl', 'NEMO_vv2.98.owl', 'cogat_v0.3.owl', 'doid.owl', 'ero.owl', 'pato.owl', 'pr.owl', 'ro_bfo1-1_bridge.owl', 'uberon.owl') for f in tracked_files + extra_files: if '/external/' in f and anyMembers(f, *externals): basename = os.path.basename(f) if basename in sa: target = sa[basename] if 'external' not in target: mapping.append((target, 'external/' + basename)) # make a protege catalog file to simplify life uriline = ' <uri id="User Entered Import Resolution" name="{ontid}" uri="{filename}"/>' xmllines = ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>', '<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">',] + \ [uriline.format(ontid=ont, filename=file) for ont,file in sorted(mapping)] + \ ['</catalog>'] xml = '\n'.join(xmllines) with open(f'{tempfile.tempdir}/nif-catalog-v001.xml', 'wt') as f: f.write(xml) if args['--debug']: breakpoint()
def extract(og, ng, curie, mkdir=False): graph = og.g properties = (owl.AnnotationProperty, owl.DatatypeProperty, owl.ObjectProperty) deads = [s for s in graph.subjects(owl.deprecated, rdflib.Literal(True))] for s in deads: types = set(o for o in graph.objects(s, rdf.type)) if anyMembers(types, *properties): p, o = rdfs.subPropertyOf, owl.DeprecatedProperty elif owl.Class in types: p, o = rdfs.subClassOf, owl.DeprecatedClass else: continue # don't bother with named individuals trip = (ng.check_thing(s), ng.check_thing(p), ng.check_thing(o)) if trip not in og.g: ng.g.add(trip) # TODO cases where owl:deprecated is not used but sco owl:DeprecatedClass is... base_alts = list(graph.subject_objects(oboInOwl.hasAlternativeId)) for replacedByClass, oldClassString in base_alts: if curie + ':' in oldClassString or curie + '_' in oldClassString: oldClassString = oldClassString.toPython() s = ng.check_thing(oldClassString) if s not in deads: types = set(o for o in graph.objects(replacedByClass, rdf.type)) if anyMembers(types, *properties): p, o = rdfs.subPropertyOf, owl.DeprecatedProperty elif owl.Class in types: p, o = rdfs.subClassOf, owl.DeprecatedClass else: continue # don't bother with named individuals [ng.add_trip(s, rdf.type, o) for o in types] ng.add_trip(s, p, o) ng.add_trip(s, owl.deprecated, True) ng.add_trip(s, 'replacedBy:', replacedByClass) if mkdir: parent = Path(ng.filename).parent if not parent.exists(): parent.mkdir(parents=True) ng.write()
def reverse_report(): ilx = Graph() ilx.parse('/tmp/interlex.ttl', format='turtle') not_in_ontology = set() annotations = set() relations = set() drugbank = set() t3db = set() for subject in ilx.subjects(rdf.type, owl.Class): ok = False for object in ilx.objects(subject, oboInOwl.hasDbXref): if anyMembers(object, 'uri.neuinfo.org', 'GO_', 'CHEBI_', 'PR_', 'PATO_', 'HP_', 'OBI_', 'DOID_', 'COGPO_', 'CAO_', 'UBERON_', 'NCBITaxon_', 'SO_', 'IAO_'): # FIXME doe we areally import HP? ok = True if (subject, rdf.type, owl.AnnotationProperty) in ilx: # FIXME for troy these need to be cleared up annotations.add(subject) elif (subject, rdf.type, owl.ObjectProperty) in ilx: relations.add(subject) elif 'drugbank' in object: drugbank.add(subject) elif 't3db.org' in object: t3db.add(subject) if not ok: not_in_ontology.add(subject) drugbank = drugbank & not_in_ontology t3db = t3db & not_in_ontology annotations = annotations & not_in_ontology relations = relations & not_in_ontology unaccounted = not_in_ontology - drugbank - t3db - annotations - relations report = ( f'Total {len(not_in_ontology)}\n' f'annotations {len(annotations)}\n' f'relations {len(relations)}\n' f'drugbank {len(drugbank)}\n' f't3db {len(t3db)}\n' f'unaccounted {len(unaccounted)}\n' ) print(report) return (not_in_ontology, drugbank, unaccounted)
def uri_normalization(uri): """ NOTE: this does NOT produce uris """ try: # strip hypothesis extension prefix if uri.startswith('chrome-extension://bjfhmglciegochdpefhhlphglcehbmek/content/web/viewer.html?file='): junk, uri = uri.split('=', 1) # universal fixes no_fragment, *_frag = uri.rsplit('#', 1) no_trailing_slash = no_fragment.rstrip('/') # annoying _scheme, no_scheme = no_trailing_slash.split('://', 1) # special cases if 'frontiersin.org' in no_scheme: # og:url on frontiers is incorrect no_scheme = no_scheme.replace('article/', 'articles/') elif 'fasebj.org' in no_scheme: # FIXME this one has _all_ the variants :/ no_scheme = (no_scheme .replace('.abstract', '') .replace('.full', '') .replace('.pdf', '') ) elif no_scheme.endswith('?needAccess=true'): no_scheme = no_scheme[:-len('?needAccess=true')] elif '?systemMessage' in no_scheme: no_scheme, junk = no_scheme.rsplit('?systemMessage', 1) # specific fixes if anyMembers(no_scheme, 'acs.org', 'ahajournals.org', 'biologicalpsychiatryjournal.com', 'ebiomedicine.com', 'fasebj.org', 'frontiersin.org', 'future-science.com', 'hindawi.com', 'ieee.org', 'jclinepi.com', 'jpeds.com', 'liebertpub.com', 'mitpressjournals.org', 'molbiolcell.org', 'molmetab.com', 'neurobiologyofaging.org', 'physiology.org', 'sagepub.com', 'sciencedirect.com', 'tandfonline.com', 'theriojournal.com', 'wiley.com',): # NOTE not all the above hit all of these # almost all still resolve normalized = (no_scheme .replace('/abstract', '') .replace('/abs', '') .replace('/fulltext', '') .replace('/full', '') .replace('/pdf', '')) #elif ('sciencedirect.com' in no_scheme): #normalized = (no_scheme #.replace('/abs', '')) elif ('cell.com' in no_scheme): normalized = (no_scheme # FIXME looks like cell uses /abstract in og:url .replace('/abstract', '/XXX') .replace('/fulltext', '/XXX')) elif 'jneurosci.org' in no_scheme: # TODO content/early -> resolution_chain(doi) normalized = (no_scheme .replace('.short', '') .replace('.long', '') .replace('.full', '') .replace('.pdf', '') # note .full.pdf is a thing ) elif 'pnas.org' in no_scheme: normalized = (no_scheme .replace('.short', '') .replace('.long', '') .replace('.full', '')) elif 'mdpi.com' in no_scheme: normalized = (no_scheme .replace('/htm', '')) elif 'f1000research.com' in no_scheme: # you should be ashamed of yourselves for being in here for this reason normalized, *maybe_version = no_scheme.rsplit('/v', 1) elif 'academic.oup.com' in no_scheme: normalized, *maybesr = no_scheme.rsplit('?searchresult=', 1) _normalized, maybe_junk = normalized.rsplit('/', 1) numbers = '0123456789' if (maybe_junk[0] not in numbers or # various ways to detect the human readable junk after the id maybe_junk[-1] not in numbers or '-' in maybe_junk or len(maybe_junk) > 20): normalized = _normalized elif anyMembers(no_scheme, 'jci.org', 'nature.com'): # cases where safe to remove query fragment normalized, *_query = no_scheme.rsplit('?', 1) normalized, *table_number = normalized.rsplit('/tables/', 1) elif 'pubmed/?term=' in no_scheme and noneMembers(no_scheme, ' ', '+'): normalized = no_scheme.replace('?term=', '') elif 'nih.gov/pubmed/?' in no_scheme: # FIXME scibot vs client norm? normalized = no_scheme.replace(' ', '+') elif 'govhttp' in no_scheme: # lol oh dear hrm, oops = no_scheme.split('govhttp') ded, wat = oops.split('//', 1) blargh, suffix = wat.split('/', 1) normalized = hrm + 'gov/pmc/' + suffix elif 'table/undtbl' in no_scheme: normalized, table_number = no_scheme.rsplit('table/undtbl') elif anyMembers(no_scheme, 'index.php?', ): # cases where we just use hypothes.is normalization _scheme, normalized = uri_normalize(uri).split('://') # FIXME h dependency else: normalized = no_scheme 'onlinelibrary.wiley.com/doi/10.1002/cne.23727?wol1URL=/doi/10.1002/cne.23727®ionCode=US-CA&identityKey=e2523300-b934-48c9-b08e-940de05d7335' 'www.jove.com/video/55441/?language=Japanese' 'www.nature.com/neuro/journal/v19/n5/full/nn.4282.html' 'www.nature.com/cr/journal/vaop/ncurrent/full/cr201669a.html' 'https://www.nature.com/articles/cr201669' #{'www.ingentaconnect.com/content/umrsmas/bullmar/2017/00000093/00000002/art00006': #[OntId('DOI:10.5343/bms.2016.1044'), OntId('DOI:info:doi/10.5343/bms.2016.1044')]} # pmid extract from pmc #<meta name="citation_pmid" content="28955177"> return normalized except ValueError as e: # split fail pdf_prefix = 'urn:x-pdf:' if uri.startswith(pdf_prefix): return uri elif uri in bad_uris: print('AAAAAAAAAAAAAAAAAAAAAAAAAAA', uri) return 'THIS URI IS GARBAGE AND THIS IS ITS NORMALIZED FORM' else: raise TypeError(uri) from e
# check for mismatched import and ontology iris itrips = local_imports(remote_base, local_base, onts, readonly=True, dobig=True) sa = {os.path.basename(o): s for s, p, o in itrips if 'sameAs' in p} # FIXME should be able to do this by checking what is tracked by git... externals = ('CogPO.owl', 'NEMO_vv2.98.owl', 'cogat_v0.3.owl', 'doid.owl', 'ero.owl', 'pato.owl', 'pr.owl', 'ro_bfo1-1_bridge.owl', 'uberon.owl') for f in fs: if '/external/' in f and anyMembers(f, *externals): basename = os.path.basename(f) if basename in sa: target = sa[basename] if 'external' not in target: mapping.append((target, 'external/' + basename)) # make a protege catalog file to simplify life uriline = ' <uri id="User Entered Import Resolution" name="{ontid}" uri="{filename}"/>' xmllines = ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>', '<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">',] + \ [uriline.format(ontid=ont, filename=file) for ont,file in sorted(mapping)] + \ ['</catalog>'] xml = '\n'.join(xmllines) with open('/tmp/nif-catalog-v001.xml', 'wt') as f:
def already_released_or_skipped(self): return any(anyMembers(r.tags, self.REPLY_TAG) for r in self.replies)
def swapUriSwitch(trip, ureps, fragment_prefixes): for spo in trip: if not isinstance(spo, rdflib.URIRef): yield spo, None, None continue elif spo in ureps: new_spo = ureps[spo] rep = (new_spo, owl.sameAs, spo) if 'nlx_' in new_spo: pref = 'nlx_' elif '/readable/' in new_spo: pref = 'NIFRID' else: pref = 'NIFSTD' yield new_spo, rep, pref continue elif anyMembers( spo, # backend refactor 'BIRNLex_annotation_properties.owl#', 'OBO_annotation_properties.owl#'): _, suffix = spo.rsplit('#', 1) new_spo = rdflib.URIRef( os.path.join(NIFSTDBASE, 'readable', suffix)) rep = (new_spo, owl.sameAs, spo) pref = 'NIFRID' yield new_spo, rep, pref continue try: uri_pref, fragment = spo.rsplit('#', 1) if '_' in fragment: frag_pref, p_suffix = fragment.split('_', 1) if not p_suffix[0].isdigit(): p, suffix = p_suffix.split('_', 1) frag_pref = frag_pref + '_' + p else: suffix = p_suffix frag_pref_ = frag_pref + '_' if frag_pref_ in fragment_prefixes: if frag_pref_ == 'nlx_sub_': pref = 'nlx_subcell_' elif frag_pref_ == 'nif_organ_': pref = 'nlx_organ_' else: pref = frag_pref_ # come on branch predictor you can do it! elif frag_pref_ == 'nlx_neuron_': # special case rest = 'nt_' suffix = suffix[len(rest):] pref = frag_pref_ + rest else: yield spo, None, None continue elif 'sao' in fragment: suffix = fragment[3:].strip('-') pref = 'sao' else: yield spo, None, None continue new_spo = rdflib.URIRef(NIFSTDBASE + pref + suffix) if new_spo != spo: rep = (new_spo, owl.sameAs, spo) else: rep = None print('Already converted', spo) yield new_spo, rep, pref except ValueError: # there was no # so do not split yield spo, None, None continue
def __init__(self, args, **kwargs): kwargs = {k: None if v == 'None' else v for k, v in kwargs.items()} self.__dict__.update(kwargs) self._updated = False if self.check_built: self.build_only = True self._host = HOST self._user = USER self.args = args self.ontload_args = { k: None if v == 'None' else v for k, v in args.items() } # send them all! self.ontload_args['scigraph'] = self.services if self.all or self.graph: self.ontload_args['graph'] = True self.ontload_args.update({ 'imports': None, 'chain': None, 'extra': None, '<ontologies>': [] }) mode = [ k for k, v in self.args.items() if not k.startswith('-') and not k.startswith('<') and v ] self.mode = mode[0] if mode else None self.build_services_config( ) # needed to update self.graph_folder XXX hack fixme self._init_more() self.same_remotes = False if self.local and self.build_only: if self.check_built: self.local_dispatch() return elif self.build_host == self.services_host and self.build_user == self.build_user: self.same_remotes = True # the executor is different from the remotes if self.build_host != self._host and not self.check_built: #self._host = self.build_host #self._user = self.build_user self._building = False for name, obj in inspect.getmembers(self): continue # TODO there is a bug here with executor/build boundaries # i think the issues is with the way we are calling anyMembers if inspect.ismethod(obj) and anyMembers( name, 'config', 'services', 'graph', 'remote'): @wraps(obj) def mutex_on_ssh(*args, func=obj, **kwargs): # ah late binding hacks if not self._building: self._building = True out = func(*args, **kwargs) self._building = False if out.startswith('('): #out = f'"{out[1:-1]}"' out = out[1:-1] print('YAY FOR ONLY ONE SSH!') return out #return f'ssh {self._user}@{self._host} {out}' else: return func(*args, **kwargs) setattr(self, name, mutex_on_ssh) elif not self.local: print('WARNING: all servers are equivalent to localhost ' 'but you are running without --local. Did you mean to?') elif self.build_host == self._host and self.build_user == self._user: if not self.local: print('WARNING: all servers are equivalent to localhost ' 'but you are running without --local. Did you mean to?')
def would_you_like_to_know_more_question_mark(): # resolving differences between classes more_ids = set(( 'http://uri.neuinfo.org/nif/nifstd/readable/ChEBIid', 'http://uri.neuinfo.org/nif/nifstd/readable/GOid', 'http://uri.neuinfo.org/nif/nifstd/readable/MeshUid', 'http://uri.neuinfo.org/nif/nifstd/readable/PMID', 'http://uri.neuinfo.org/nif/nifstd/readable/UmlsCui', 'http://uri.neuinfo.org/nif/nifstd/readable/bamsID', 'http://uri.neuinfo.org/nif/nifstd/readable/bonfireID', 'http://uri.neuinfo.org/nif/nifstd/readable/cell_ontology_ID', 'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationID', 'http://uri.neuinfo.org/nif/nifstd/readable/definingCitationURI', 'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDataID', 'http://uri.neuinfo.org/nif/nifstd/readable/emapMouseStageDiagramID', 'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceId', 'http://uri.neuinfo.org/nif/nifstd/readable/externalSourceURI', 'http://uri.neuinfo.org/nif/nifstd/readable/gbifID', 'http://uri.neuinfo.org/nif/nifstd/readable/gbifTaxonKeyID', 'http://uri.neuinfo.org/nif/nifstd/readable/gene_Ontology_ID', #'http://uri.neuinfo.org/nif/nifstd/readable/hasExternalSource', 'http://uri.neuinfo.org/nif/nifstd/readable/hasGenbankAccessionNumber', 'http://uri.neuinfo.org/nif/nifstd/readable/imsrStandardStrainName', 'http://uri.neuinfo.org/nif/nifstd/readable/isReplacedByClass', 'http://uri.neuinfo.org/nif/nifstd/readable/jaxMiceID', 'http://uri.neuinfo.org/nif/nifstd/readable/ncbiTaxID', 'http://uri.neuinfo.org/nif/nifstd/readable/neuronamesID', 'http://uri.neuinfo.org/nif/nifstd/readable/nifID', 'http://uri.neuinfo.org/nif/nifstd/readable/sao_ID', 'http://uri.neuinfo.org/nif/nifstd/readable/umls_ID', 'http://www.geneontology.org/formats/oboInOwl#id', )) outside = [] eee = {} resolver_not_ilx_only_but_not_in_scigraph = set() # resources.ttl _res = Graph().parse((gitf / 'NIF-Ontology/ttl/resources.ttl').as_posix(), format='turtle') reslookup = {uri:[l] for uri, l in _res.subject_objects(rdfs.label)} for uri in chain(h_uris, resolver_not_ilx_only): if 'uri.neuinfo.org' in uri: try: meta = sgg.getNode(uri.toPython())['nodes'][0]['meta'] asdf = {hng.qname(k):v for k, v in meta.items() if k in more_ids} except TypeError: resolver_not_ilx_only_but_not_in_scigraph.add(uri) # resources.ttl ;) if uri in reslookup: # no differentia asdf = False else: asdf = False print('WTF', uri) if asdf: #print(uri, asdf) eee[uri] = asdf for l in asdf.values(): for e in l: outside.append(e) outside_dupes = [v for v, c in Counter(outside).most_common() if c > 1] eee_dupes = {k:v for k, v in eee.items() if anyMembers(outside_dupes, *(e for l in v.values() for e in l))} #for uri, meta in sorted(eee_dupes.items(), key=lambda a:sorted(a[1].values())): #print(uri.toPython(), sorted((e.replace('PMID: ', 'PMID:'), k) for k, l in meta.items() for e in l)) # attempt to deal with label mappings iexisting = defaultdict(set) iiexisting = {} for i, existing in zip(datal('ilx'), datal('iri')): #if 'uri.neuinfo.org' in existing: if 'interlex.org' not in existing and 'neurolex.org' not in existing: iexisting[i].add(URIRef(existing)) iiexisting[URIRef(existing)] = i iexisting = {**iexisting} _ilabs = {k:l for k, l in zip(datal('ilx'), datal('label'))} def inner(iri): resp = sgv.findById(iri) if resp is not None: l = resp['labels'] else: l = [] #_ilabs[iiexisting[iri]] + '** already in ilx **'] #print('trouble?', iri) # ilx only return iri, l #labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in chain(h_uris, (e for s in iexisting.values() for e in s)))} labs = {k:v[0] if v else '<--NO-LABEL-->' for k, v in Async()(deferred(inner)(id_) for id_ in h_uris)} ilabs = {k:l.lower() for k, l in zip(datal('ilx'), datal('label'))} iilabs = {v:k for k, v in ilabs.items()} assert len(ilabs) == len(iilabs) missing_map = {k:iilabs[v.lower()] for k, v in labs.items() if v and v.lower() in iilabs} # XXX this is not valid missing_existing = {i:[m, *iexisting[i]] for m, i in missing_map.items() if i in iexisting} missing_equivs = {next(iter(iexisting[i])):i for m, i in missing_map.items() if i in iexisting} eid = NIFRID.externalSourceId.toPython() ded = owl.deprecated.toPython() # SP: -> swissprot vs uniprot mmr = [] proto_mmr_1_to_1 = {} arrr = defaultdict(set) uniprot_iuphar = set() for uri, ilx_frag in {**missing_equivs, **missing_map}.items(): uri = URIRef(uri) try: meta = sgg.getNode(uri.toPython())['nodes'][0]['meta'] except TypeError: # just ignore these, they are ilx only :/ meta = {} if eid in meta: src = meta[eid][0] if src.startswith('SP:'): src = tc.yellow(src.replace('SP:', 'http://www.uniprot.org/uniprot/')) #elif src.startswith('IUPHAR:'): #pass #else: #src = 'TODO' elif ded in meta and meta[ded]: src = tc.red('ded ') else: src = 'TODO' val = labs[uri] if uri in labs else _ilabs[ilx_frag] + ' **' if uri in eee: differentia = str(eee[uri]) for v in eee[uri].values(): for e in v: arrr[e].add(uri) if 'SP:' in e or 'IUPHAR:' in e: uniprot_iuphar.add(uri) else: differentia = '' if uri in _ilx and uri in all_uris: ruri = SGG[hng.qname(uri)] ruri = tc.blue(f'{ruri:<60}') else: ruri = uri ruri = f'{ruri:<60}' v = ' '.join((f'{val:<60}', src, ruri, ilxb[ilx_frag], differentia)) mmr.append(v) proto_mmr_1_to_1[uri] = v src = None arrr = {**arrr} arrr_not_1_to_1 = {k:v for k, v in arrr.items() if len(v) > 1} #arrr_n11_uris = set((u.toPython() for v in arrr_not_1_to_1.values() for u in v)) arrr_n11_uris = set.union(*arrr_not_1_to_1.values()) mmr_1_to_1 = {k:v for k, v in proto_mmr_1_to_1.items() if k not in arrr_n11_uris} no_uniprot = {k:v for k, v in proto_mmr_1_to_1.items() if k not in uniprot_iuphar} arrr_n11_text = '\n'.join(f'{k:<15} {sorted(_.toPython() for _ in v)}' for k, v in arrr_not_1_to_1.items()) mmr.sort() mmr_text = '\n'.join(mmr) mmr_1_to_1_text = '\n'.join(sorted(mmr_1_to_1.values())) no_uniprot_text = '\n'.join(sorted(no_uniprot.values()))
def main(): from docopt import docopt args = docopt(__doc__, version='rridxp 0.0.0') print(args) from scibot.release import get_annos, Curation, SamePMID, MultiplePMID, MultipleDOI, MPP, MPD annos = get_annos() [Curation(a, annos) for a in annos] def midr(): mp = multiIssue(MultiplePMID(Curation)) md = multiIssue(MultipleDOI(Curation)) # filtering by url first removes any detectable instances of multiple dois/pmids #mpp = multiIssue(MPP(Curation)) #mpd = multiIssue(MPD(Curation)) with open('multiple-pmids.json', 'wt') as f: json.dump(mp, f, sort_keys=True, indent=4, cls=Encode) with open('multiple-dois.json', 'wt') as f: json.dump(md, f, sort_keys=True, indent=4, cls=Encode) if args['multi-id-report']: midr() elif args['csv']: substrings = args['<filter>'] # ['MGI', 'IMSR'] if substrings: ssj = '-'.join(ss.lower() for ss in substrings) + '-' else: substrings = [''] ssj = 'all-' pmids2 = SamePMID( set(annotation for paper in Curation._papers.values() for rrid, annotations in paper.items() if rrid is not None and anyMembers(rrid, *substrings) for annotation in annotations)) now = UTCNOW() rows = [[ 'PMID', 'DOI', 'URI', 'shareLink', 'exact', 'rrid', 'public_tags' ]] rows += sorted( deNone(anno.pmid, anno.doi, anno.uri, anno.shareLink, anno.exact, anno.rrid, ','.join( [t for t in anno.public_tags if 'RRID:' not in t])) for pmid, papers in pmids2.items() for rrids in papers.values() for annos in rrids.values() for anno in annos) with open(f'{ssj}rrids-{now}.csv', 'wt') as f: csv.writer(f, lineterminator='\n').writerows(rows) nomatch = [[ 'PMID', 'DOI', 'URI', 'shareLink', 'exact', 'rrid', 'public_tags' ]] nomatch += sorted( deNone(anno.pmid, anno.doi, anno.uri, anno.shareLink, anno.exact, anno.rrid, ','.join( [t for t in anno.public_tags if 'RRID:' not in t])) for pmid, papers in pmids2.items() for rrids in papers.values() for annos in rrids.values() for anno in annos if anno.exact and anno.rrid and anno.exact not in anno.rrid) with open(f'{ssj}rrids-nomatch-{now}.csv', 'wt') as f: csv.writer(f, lineterminator='\n').writerows(nomatch) if args['--debug']: embed()