def wrpy_pwy2pmids(self, fout_py): """Write Publications(LiteratureReference, Book, URL) into a Python module.""" pw2pmids = self.pubs['pw2pubs'] if not pw2pmids: print(" NO items. Not writing {PY}".format(PY=fout_py)) return with open(fout_py, 'w') as prt: # prt = sys.stdout prt_docstr_module('PubMed IDs for each Pathway', prt) prt.write( '\n# {N} of {M} Pathways are associated with PubMed IDs\n'. format(N=len(pw2pmids), M=len(self.pw2info))) prt.write( '# pylint: disable=line-too-long,too-many-lines,bad-continuation\n' ) prt.write('PWY2PMIDS = {\n') for pwy, pmid_nts in sorted(pw2pmids.items(), key=self._sortby): pmids = sorted(set(pmid for pmid, _ in pmid_nts)) prt.write(" '{PW}' : {{{PMIDS}}},\n".format( PW=pwy, PMIDS=", ".join(str(i) for i in pmids))) prt.write('}\n\n') prt_copyright_comment(prt) print(" {N:5} items WROTE: {PY}".format(N=len(pw2pmids), PY=fout_py)) return pw2pmids
def wrpy_pubmeds(self, fout_py): """Write Publications(LiteratureReference, Book, URL) into a Python module.""" pmid2nt = self._get_pmid2nt() if not pmid2nt: print(" NO items. Not writing {PY}".format(PY=fout_py)) return keys = ' '.join(next(iter(pmid2nt.values()))._fields) with open(fout_py, 'w') as prt: prt.write('# coding=utf-8\n') # prt = sys.stdout prt_docstr_module( 'Publications including Pubmed papers, Books, and URLs', prt) prt.write('from collections import namedtuple\n') prt.write('\n# {N} PubMed IDs assc. w/{M} Pathways\n'.format( N=len(pmid2nt), M=len(self.pubs['pw2pubs']))) prt.write( "Ntlit = namedtuple('ntlit', '{KEYS}')\n".format(KEYS=keys)) prt.write( '# pylint: disable=line-too-long,too-many-lines,bad-continuation\n' ) prt.write('PMID2NT = {\n') for pmid, ntd in sorted(pmid2nt.items(), key=lambda t: [t[1].year, t[0]]): prt.write(' {PMID:>8} : Ntlit._make({VALS}),\n'.format( PMID=pmid, VALS=list(ntd))) prt.write('}\n\n') prt_copyright_comment(prt) print(" {N:5} items WROTE: {PY}".format(N=len(pmid2nt), PY=fout_py)) return pmid2nt
def wrpy_gons(self, fout_py, name='GO_BiologicalProcess'): """Write Gene Ontology information for a Pathway's Biological Processes.""" pwy2ns = self._get_pwy2ns(name) if not pwy2ns: print(" NO items. Not writing {PY}".format(PY=fout_py)) return with open(fout_py, 'w') as prt: # prt = sys.stdout prt_docstr_module( 'Gene Ontology {NS} for each Pathway'.format(NS=name), prt) prt.write( '\n# {N} of {M} Pathways are associated with {NS}\n'.format( N=len(pwy2ns), M=len(self.pw2info), NS=name)) prt.write( '# pylint: disable=line-too-long,too-many-lines,bad-continuation\n' ) prt.write('PWY2GOS = {\n') for pwy, goids in sorted(pwy2ns.items(), key=self._sortby): goids_str = ", ".join( sorted(set("'{GO}'".format(GO=go) for go in goids))) prt.write(" '{PW}' : {{{GOS}}},\n".format(PW=pwy, GOS=goids_str)) prt.write('}\n\n') prt_copyright_comment(prt) print(" {N:5} items WROTE: {PY}".format(N=len(pwy2ns), PY=fout_py)) return pwy2ns
def wrpy_figure(self, fpat_py): """Write pathway figure information to a Python file.""" fout_py = fpat_py.format(ABC=self.taxnt.abc) pw2ntfig = self._get_ntfig() if not pw2ntfig: print(" NO items. Not writing {PY}".format(PY=fout_py)) return keys = ' '.join(next(iter(pw2ntfig.values()))._fields) with open(os.path.join(REPO, fout_py), 'w') as prt: prt_docstr_module( '{N} of {T} Pathway have figures'.format(N=len(pw2ntfig), T=len(self.pw2info)), prt) prt.write('from collections import namedtuple\n') prt.write( "Ntfig = namedtuple('ntfig', '{KEYS}')\n".format(KEYS=keys)) prt.write("PW2FIGS = {\n") for pwy, ntfig in sorted(pw2ntfig.items(), key=self._sortby): prt.write(" '{PWY}' : Ntfig._make({VALS}),\n".format( PWY=pwy, VALS=list(ntfig))) prt.write("}\n") prt_copyright_comment(prt) print(' {N:5} items WROTE: {PY}'.format(N=len(pw2ntfig), PY=fout_py)) return pw2ntfig
def wrpy_pwy2nt(self, fout_py): """Write all pathways into a Python module in a condensed format.""" pwy2nt = self.get_pwy2nt() if not pwy2nt: print(" NO items. Not writing {PY}".format(PY=fout_py)) return keys = ' '.join(next(iter(pwy2nt.values()))._fields) with open(fout_py, 'w') as prt: prt.write('# coding=utf-8\n') prt_docstr_module('Pathway information', prt) prt.write('from collections import namedtuple\n') prt.write('from datetime import date\n') prt.write('\n# Keys:\n{KEY}\n'.format(KEY=self._get_pwmarkdoc())) prt.write('\n# Keys:\n{KEY}\n'.format(KEY=self._get_nsdoc())) prt.write( "\nNto = namedtuple('ntpwy', '{KEYS}')\n".format(KEYS=keys)) prt.write('# {N} {SPECIES} Pathways\n'.format( N=len(pwy2nt), SPECIES=self.taxnt.displayName)) prt.write('# pylint: disable=line-too-long,too-many-lines\n') prt.write('PWYNTS = [\n') for dct in pwy2nt.values(): ntstr = '{}'.format(list(dct)).replace('datetime.date', 'date') prt.write(' Nto._make({VALS}),\n'.format(VALS=ntstr)) prt.write(']\n') prt_copyright_comment(prt) print(" {N:5} pathways WROTE: {TXT}".format(N=len(self.pw2info), TXT=fout_py)) return pwy2nt
def wrpy_info(self, fout_py): """Print Reactome species main information.""" fields = ['abc', 'abbreviation', 'taxId', 'displayName'] with open(os.path.join(REPO, fout_py), 'w') as prt: prt_docstr_module('Species in Reactome', prt) prt.write('import collections as cx\n\n') prt_namedtuple(self.dcts, 'SPECIES', fields, prt) prt_copyright_comment(prt) print(' WROTE: {PY}'.format(PY=fout_py))
def main(): """Print Reactome version in downloaded DAG.""" fout_py = 'src/reactomepy/data/reactome_version.py' with open(fout_py, 'w') as prt: prt_docstr_module('Reactome version in DAG', prt) # CYPHER: MATCH (v:DBInfo) RETURN v version = get_version(get_gdbdr()) prt.write('VERSION = {V}\n'.format(V=version)) prt_copyright_comment(prt) print(' Version {V} WROTE: {PY}\n'.format(PY=fout_py, V=version))
def wrpy_common_names(self, fout_py): """Print species common names.""" taxid2namesalt = self._get_taxid2commonnames() with open(os.path.join(fout_py), 'w') as prt: prt_docstr_module('Common name for the species in Reactome', prt) prt.write('# pylint: disable=line-too-long\n') taxid_names = sorted(taxid2namesalt.items()) prt_dict(taxid_names, 'TAXID2NAMES', afmt='{A}', bfmt=None, prt=prt) prt_copyright_comment(prt) print(' WROTE: {PY}'.format(PY=fout_py))
def wrpy_referencedatabase_nts(self, fout_py): """Print referencedatabase common names.""" # Find all referencedatabases which have definitions id_nt_lst = sorted(self.referencedatabases.items(), key=lambda t: t[1].displayName) with open(os.path.join(REPO, fout_py), 'w') as prt: docstr = '{N} ReferenceDatabases in Reactome'.format( N=len(id_nt_lst)) prt_docstr_module(docstr, prt) prt.write('from collections import namedtuple\n\n') prt.write('# pylint: disable=line-too-long\n') prt_id2nt(id_nt_lst, prt) prt_copyright_comment(prt) print(' WROTE: {PY}'.format(PY=fout_py))
def wrpy_disease2fld(self, fout_py, field, varname): """Print disease common names.""" # Find all diseases which have definitions dis2val = self._get_dis2fldval(field) with open(os.path.join(REPO, fout_py), 'w') as prt: docstr = '{N} of {M} diseases in Reactome have definitions'.format( M=self.num_dis, N=len(dis2val)) prt_docstr_module(docstr, prt) prt.write('# pylint: disable=line-too-long\n') disease_names = sorted(dis2val.items()) prt_dict(disease_names, varname, afmt='"{A}"', bfmt='"{B}"', prt=prt) prt_copyright_comment(prt) print(' WROTE: {PY}'.format(PY=fout_py))
def wrpy_pw2molecules(self, fout_py, database='UniProt'): """Print the Participating molecules for a pathway.""" pw2molecules = self.get_pw2molecules(database) molecules = set(m for ms in pw2molecules.values() for m in ms) hms = get_hms(TIC) msg = '{N:4} Pathways contain {M:5} items from {DB}'.format( N=len(pw2molecules), M=len(molecules), DB=database) with open(os.path.join(REPO, fout_py), 'w') as prt: prt_docstr_module(msg, prt) prt.write('# pylint: disable=line-too-long, too-many-lines\n') prt.write('PWY2{ITEM}S = {{\n'.format(ITEM=database.upper())) for pwy, molecules in sorted(pw2molecules.items(), key=lambda t: [int(t[0].split('-')[2]), t[0]]): prt.write(" '{PWY}':".format(PWY=pwy)) mstrs = ["'{V}'".format(V=m) for m in sorted(molecules)] prt.write("{{{SET}}},\n".format(SET=", ".join(mstrs))) # prt_namedtuple(self.dcts, 'SPECIES', fields, prt) prt.write('}\n') prt_copyright_comment(prt) filesize = int(os.stat(os.path.join(REPO, fout_py)).st_size/1000000.0) print(" {HMS} {MB} Mbytes {MSG} WROTE: {PY}".format(HMS=hms, MB=filesize, MSG=msg, PY=fout_py))
def wrpy_pwy2disease(self, fout_py): """Write all pathways that have associated diseases.""" pwy_dis = [(p, d['disease']) for p, d in self.pw2info.items() if 'disease' in d] if not pwy_dis: print(" NO items. Not writing {PY}".format(PY=fout_py)) return num_pwy = len(pwy_dis) num_dis = len(set(d for _, ds in pwy_dis for d in ds)) with open(fout_py, 'w') as prt: msg = '{N} Pathways have {M} diseases'.format(N=num_pwy, M=num_dis) prt_docstr_module(msg, prt) prt.write('# pylint: disable=line-too-long\n') prt.write('PWY2DIS = {\n') for pwy, dis in pwy_dis: prt.write(" '{PWY}': {DIS},\n".format(PWY=pwy, DIS=dis)) prt.write('}\n') prt_copyright_comment(prt) print(" {MSG} WROTE: {PY}".format(MSG=msg, PY=fout_py)) return pwy_dis
def wrpy_relatedspecies(self, fout_py): """Write related species for a pathway, if it exists.""" pwy2taxids = self._get_pwy2relatedspecies() if not pwy2taxids: print(" NO items. Not writing {PY}".format(PY=fout_py)) return with open(fout_py, 'w') as prt: prt_docstr_module('Related species.', prt) prt.write('\n# {N} of {M} Pathways have related species\n'.format( N=len(pwy2taxids), M=len(self.pw2info))) # prt.write('# pylint: disable=line-too-long,too-many-lines,bad-continuation\n') prt.write('PWY2TAXIDS = {\n') for pwy, taxids in sorted(pwy2taxids.items(), key=self._sortby): prt.write(" '{PW}' : {TAXIDS},\n".format(PW=pwy, TAXIDS=taxids)) prt.write('}\n\n') prt_copyright_comment(prt) print(" {N:5} items WROTE: {PY}".format(N=len(pwy2taxids), PY=fout_py)) return pwy2taxids
def wrpy_pwy2summation(self, fpat_py): """Write pathway summation to a Python file.""" fout_py = fpat_py.format(ABC=self.taxnt.abc) pwy2summation = self.objqu.get_pwy2summation(self.log) if not pwy2summation: print(" NO items. Not writing {PY}".format(PY=fout_py)) return with open(os.path.join(REPO, fout_py), 'w') as prt: prt.write('# coding=utf-8\n') prt_docstr_module('Summations for pathways', prt) prt.write('# pylint: disable=line-too-long,too-many-lines\n') prt.write("PW2SUMS = {\n") for pwy, summation in sorted(pwy2summation.items(), key=self._sortby): # Get the summation from the original pathway prt.write(" '{KEY}': {VAL},\n".format(KEY=pwy, VAL=summation)) prt.write("}\n") prt_copyright_comment(prt) print(' {N:5} items WROTE: {PY}'.format(N=len(pwy2summation), PY=fout_py)) return pwy2summation