Exemplo n.º 1
0
 def get_nodes_query(self, query, msg=None):
     """Get Summation as a Neo4jNode."""
     nodes = []
     tic = timeit.default_timer()
     with self.gdbdr.session() as session:
         for rec in session.run(query).records():
             nodes.append(Neo4jNode(rec['s']))
     print('  {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic),
                                        N=len(nodes),
                                        MSG=msg if msg else query))
     return nodes
Exemplo n.º 2
0
 def ses_dbid2nodebasic_src(self, query):
     """Get the schemaClasses and dbIds for all nodes below the specified node."""
     # MATCH (src{USERVALS}) RETURN DISTINCT ...
     dbid2nodebasic = {}
     src_dbids = set()
     tic = timeit.default_timer()
     with self.gdbdr.session() as session:
         for idx, rec in enumerate(session.run(query).records()):
             src_dbid = rec['src_dbId']
             src_dbids.add(src_dbid)
             if src_dbid not in dbid2nodebasic:
                 self._add_id2nodeb(dbid2nodebasic, src_dbid,
                                    rec['src_schemaClass'])
             if idx % 100000 == 0:
                 print('  {HMS} {IDX:7,} {NT} ...'.format(HMS=get_hms(tic),
                                                          IDX=idx,
                                                          NT=rec))
     print('  HMS: {HMS} {N:6,} dbIds: {Q}'.format(HMS=get_hms(tic),
                                                   N=len(dbid2nodebasic),
                                                   Q=query))
     return dbid2nodebasic, src_dbids
Exemplo n.º 3
0
 def get_nodes_sch(self, srchstr, msg=None):
     """Get Summation as a Neo4jNode."""
     nodes = []
     tic = timeit.default_timer()
     qry = 'MATCH (s:{SRCHSTR}) RETURN s'.format(SRCHSTR=srchstr)
     with self.gdbdr.session() as session:
         for rec in session.run(qry).records():
             nodes.append(Neo4jNode(rec['s']))
     print('  {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic),
                                        N=len(nodes),
                                        MSG=msg if msg else srchstr))
     return nodes
Exemplo n.º 4
0
 def get_dbid2set(self, qry, prt=sys.stdout):
     """Get a set of values for every dbId from Reactome."""
     dbid2set = cx.defaultdict(set)
     # Example: MATCH (s:InstanceEdit)-[r]->(f:Figure) RETURN s.dbId AS dbId, f.dbId AS val
     with self.gdbdr.session() as session:
         for rec in session.run(qry).records():
             dbid2set[rec['dbId']].add(rec['val'])
     if prt:
         prt.write('  {HMS} {N:,} dbIds: {Q}\n'.format(
             HMS=get_hms(self.tic),
             N=len(dbid2set),
             Q=self._shorten_queryprt(qry)))
     return {dbid: vals for dbid, vals in dbid2set.items()}
Exemplo n.º 5
0
 def get_dbid2val(self, qry, prt=sys.stdout):
     """Get a value for every dbId from Reactome."""
     dbid2val = {}
     # Example: 'MATCH (f:Figure) RETURN f.dbId AS dbId, f.url AS val'
     with self.gdbdr.session() as session:
         for rec in session.run(qry).records():
             dbid2val[rec['dbId']] = rec['val']
     if prt:
         prt.write('  {HMS} {N:,} dbIds: {Q}\n'.format(
             HMS=get_hms(self.tic),
             N=len(dbid2val),
             Q=self._shorten_queryprt(qry)))
     return dbid2val
Exemplo n.º 6
0
 def get_dbids(self, qry, prt=sys.stdout):
     """Get dbIds given a query."""
     # Example: 'MATCH (s:Figure) RETURN s.dbId AS dbId'
     dbids = set()
     with self.gdbdr.session() as session:
         for idx, rec in enumerate(session.run(qry).records()):
             dbids.add(rec['dbId'])
             if prt and idx % 10000 == 0:
                 prt.write('{HMS} {IDX} {DBID}'.format(HMS=get_hms(
                     self.tic),
                                                       IDX=idx,
                                                       DBID=rec['dbId']))
     return dbids
Exemplo n.º 7
0
 def _addval_src_norel(pat, dbid2node_missing, session):
     """Add paramter values for node IDs that have no relationships."""
     # MATCH (src:DatabaseObject{dbId:DBID}) RETURN src
     dbid2dct = {}
     tic = timeit.default_timer()
     for dbid, nodebasic in dbid2node_missing.items():
         qry = pat.format(DBID=str(dbid))
         for rec in session.run(qry).records():
             dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['src'])
     print('  HMS: {HMS} {N:6,} dbIds: {Q}'.format(HMS=get_hms(tic),
                                                   N=len(
                                                       dbid2node_missing, ),
                                                   Q=qry))
     return dbid2dct
Exemplo n.º 8
0
 def get_dbid2node(self, dbids, msg='nodes found'):
     """Get Summation as a Neo4jNode."""
     dbid2node = {}
     tic = timeit.default_timer()
     qupat = 'MATCH (s:DatabaseObject{{dbId:{DBID}}}) RETURN s'
     with self.gdbdr.session() as session:
         for dbid in dbids:
             query = qupat.format(DBID=dbid)
             for rec in session.run(query).records():
                 dbid2node[dbid] = Neo4jNode(rec['s'])
     print('FASTISH  {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic),
                                               N=len(dbid2node),
                                               MSG=msg))
     return dbid2node
Exemplo n.º 9
0
 def get_dbid2ntset(self, qry, prt=sys.stdout):
     """Get a set of tuples (rel, dst.dbId) for each spepcified source dbId from Reactome."""
     dbid2ntset = cx.defaultdict(set)
     # Ex: MATCH (e:InstanceEdit)-[r]->(f:Figure)
     #     RETURN f.dbId AS key_dbId, type(r) AS rtyp, e.dbId AS val_dbId
     ntobj = cx.namedtuple('NtIdRel', 'dbId rel')
     with self.gdbdr.session() as session:
         for rec in session.run(qry).records():
             dbid2ntset[rec['key_dbId']].add(
                 ntobj(dbId=rec['val_dbId'], rel=rec['rtyp']))
     if prt:
         prt.write('  {HMS} {N:,} rel-dbIds: {Q}\n'.format(
             HMS=get_hms(self.tic),
             N=len(dbid2ntset),
             Q=self._shorten_queryprt(qry)))
     return {dbid: vals for dbid, vals in dbid2ntset.items()}
Exemplo n.º 10
0
 def wrpy_pw2molecules(self, fout_py, database='UniProt'):
     """Print the Participating molecules for a pathway."""
     pw2molecules = self.get_pw2molecules(database)
     molecules = set(m for ms in pw2molecules.values() for m in ms)
     hms = get_hms(TIC)
     msg = '{N:4} Pathways contain {M:5} items from {DB}'.format(
         N=len(pw2molecules), M=len(molecules), DB=database)
     with open(os.path.join(REPO, fout_py), 'w') as prt:
         prt_docstr_module(msg, prt)
         prt.write('# pylint: disable=line-too-long, too-many-lines\n')
         prt.write('PWY2{ITEM}S = {{\n'.format(ITEM=database.upper()))
         for pwy, molecules in sorted(pw2molecules.items(), key=lambda t: [int(t[0].split('-')[2]), t[0]]):
             prt.write("    '{PWY}':".format(PWY=pwy))
             mstrs = ["'{V}'".format(V=m) for m in sorted(molecules)]
             prt.write("{{{SET}}},\n".format(SET=", ".join(mstrs)))
         # prt_namedtuple(self.dcts, 'SPECIES', fields, prt)
         prt.write('}\n')
         prt_copyright_comment(prt)
     filesize = int(os.stat(os.path.join(REPO, fout_py)).st_size/1000000.0)
     print("  {HMS} {MB} Mbytes {MSG} WROTE: {PY}".format(HMS=hms, MB=filesize, MSG=msg, PY=fout_py))
Exemplo n.º 11
0
 def get_dbid2ntnodes(self, qry, prt=sys.stdout):
     """Get a set of tuples (rel, dst.dbId) for each specified source dbId from Reactome."""
     dbid2ntnodes = cx.defaultdict(dict)
     # Ex: MATCH (s:InstanceEdit)-[r]->(d:Figure) RETURN s, type(r) AS rtyp, d
     ntobjkey = cx.namedtuple('NtIdRel', 'dbId rel')
     ntobjnode = cx.namedtuple('NtSRD', 'src rel dst')
     with self.gdbdr.session() as session:
         for rec in session.run(qry).records():
             src = rec['s']
             rel = rec['rtyp']
             dst = rec['d']
             ntkey = ntobjkey(dbId=dst['dbId'], rel=rel)
             ntnodes = ntobjnode(src=Neo4jNode(src),
                                 rel=rel,
                                 dst=Neo4jNode(dst))
             dbid2ntnodes[dst['dbId']][ntkey] = ntnodes
     if prt:
         prt.write('  {HMS} {N:,} rel-dbIds: {Q}\n'.format(
             HMS=get_hms(self.tic),
             N=len(dbid2ntnodes),
             Q=self._shorten_queryprt(qry)))
     return {dbid: vals for dbid, vals in dbid2ntnodes.items()}
Exemplo n.º 12
0
 def _addval_src_rel_dst(self, pat, dbid2nodebasic, session):
     """Get dict w/parameter values and relationships w/their destination dbIds."""
     # MATCH (s:DatabaseObject{dbId:ID})-[r]->(d) RETURN s, r, d.dbId AS d_Id
     dbid2dct = {}
     #### dbid2nodenorel = {}
     tic = timeit.default_timer()
     for dbid, nodebasic in dbid2nodebasic.items():
         qry = pat.replace('ID', str(dbid))
         for rec in session.run(qry).records():
             #### nodebasic.dct = nodebasic.objsch.get_dict(rec['s'])
             dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['s'])
             rel = rec['r'].type
             dstid = rec['d_Id']
             if dstid in dbid2nodebasic and rel not in self.excl_rel:
                 nodebasic.relationship[rel].add(dbid2nodebasic[dstid])
         #### if not nodebasic.relationship:
         ####     dbid2nodenorel[dbid] = nodebasic
     print('  HMS: {HMS} {N:6,} dbIds: {Q}'.format(HMS=get_hms(tic),
                                                   N=len(dbid2dct),
                                                   Q=qry))
     #### print('  HMS: {HMS} {N:6,} dbIds: {Q}'.format(
     ####     HMS=get_hms(tic), N=len(dbid2nodebasic)-len(dbid2nodenorel), Q=qry))
     #### return dbid2nodenorel
     return dbid2dct