def get_nodes_query(self, query, msg=None): """Get Summation as a Neo4jNode.""" nodes = [] tic = timeit.default_timer() with self.gdbdr.session() as session: for rec in session.run(query).records(): nodes.append(Neo4jNode(rec['s'])) print(' {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic), N=len(nodes), MSG=msg if msg else query)) return nodes
def ses_dbid2nodebasic_src(self, query): """Get the schemaClasses and dbIds for all nodes below the specified node.""" # MATCH (src{USERVALS}) RETURN DISTINCT ... dbid2nodebasic = {} src_dbids = set() tic = timeit.default_timer() with self.gdbdr.session() as session: for idx, rec in enumerate(session.run(query).records()): src_dbid = rec['src_dbId'] src_dbids.add(src_dbid) if src_dbid not in dbid2nodebasic: self._add_id2nodeb(dbid2nodebasic, src_dbid, rec['src_schemaClass']) if idx % 100000 == 0: print(' {HMS} {IDX:7,} {NT} ...'.format(HMS=get_hms(tic), IDX=idx, NT=rec)) print(' HMS: {HMS} {N:6,} dbIds: {Q}'.format(HMS=get_hms(tic), N=len(dbid2nodebasic), Q=query)) return dbid2nodebasic, src_dbids
def get_nodes_sch(self, srchstr, msg=None): """Get Summation as a Neo4jNode.""" nodes = [] tic = timeit.default_timer() qry = 'MATCH (s:{SRCHSTR}) RETURN s'.format(SRCHSTR=srchstr) with self.gdbdr.session() as session: for rec in session.run(qry).records(): nodes.append(Neo4jNode(rec['s'])) print(' {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic), N=len(nodes), MSG=msg if msg else srchstr)) return nodes
def get_dbid2set(self, qry, prt=sys.stdout): """Get a set of values for every dbId from Reactome.""" dbid2set = cx.defaultdict(set) # Example: MATCH (s:InstanceEdit)-[r]->(f:Figure) RETURN s.dbId AS dbId, f.dbId AS val with self.gdbdr.session() as session: for rec in session.run(qry).records(): dbid2set[rec['dbId']].add(rec['val']) if prt: prt.write(' {HMS} {N:,} dbIds: {Q}\n'.format( HMS=get_hms(self.tic), N=len(dbid2set), Q=self._shorten_queryprt(qry))) return {dbid: vals for dbid, vals in dbid2set.items()}
def get_dbid2val(self, qry, prt=sys.stdout): """Get a value for every dbId from Reactome.""" dbid2val = {} # Example: 'MATCH (f:Figure) RETURN f.dbId AS dbId, f.url AS val' with self.gdbdr.session() as session: for rec in session.run(qry).records(): dbid2val[rec['dbId']] = rec['val'] if prt: prt.write(' {HMS} {N:,} dbIds: {Q}\n'.format( HMS=get_hms(self.tic), N=len(dbid2val), Q=self._shorten_queryprt(qry))) return dbid2val
def get_dbids(self, qry, prt=sys.stdout): """Get dbIds given a query.""" # Example: 'MATCH (s:Figure) RETURN s.dbId AS dbId' dbids = set() with self.gdbdr.session() as session: for idx, rec in enumerate(session.run(qry).records()): dbids.add(rec['dbId']) if prt and idx % 10000 == 0: prt.write('{HMS} {IDX} {DBID}'.format(HMS=get_hms( self.tic), IDX=idx, DBID=rec['dbId'])) return dbids
def _addval_src_norel(pat, dbid2node_missing, session): """Add paramter values for node IDs that have no relationships.""" # MATCH (src:DatabaseObject{dbId:DBID}) RETURN src dbid2dct = {} tic = timeit.default_timer() for dbid, nodebasic in dbid2node_missing.items(): qry = pat.format(DBID=str(dbid)) for rec in session.run(qry).records(): dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['src']) print(' HMS: {HMS} {N:6,} dbIds: {Q}'.format(HMS=get_hms(tic), N=len( dbid2node_missing, ), Q=qry)) return dbid2dct
def get_dbid2node(self, dbids, msg='nodes found'): """Get Summation as a Neo4jNode.""" dbid2node = {} tic = timeit.default_timer() qupat = 'MATCH (s:DatabaseObject{{dbId:{DBID}}}) RETURN s' with self.gdbdr.session() as session: for dbid in dbids: query = qupat.format(DBID=dbid) for rec in session.run(query).records(): dbid2node[dbid] = Neo4jNode(rec['s']) print('FASTISH {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic), N=len(dbid2node), MSG=msg)) return dbid2node
def get_dbid2ntset(self, qry, prt=sys.stdout): """Get a set of tuples (rel, dst.dbId) for each spepcified source dbId from Reactome.""" dbid2ntset = cx.defaultdict(set) # Ex: MATCH (e:InstanceEdit)-[r]->(f:Figure) # RETURN f.dbId AS key_dbId, type(r) AS rtyp, e.dbId AS val_dbId ntobj = cx.namedtuple('NtIdRel', 'dbId rel') with self.gdbdr.session() as session: for rec in session.run(qry).records(): dbid2ntset[rec['key_dbId']].add( ntobj(dbId=rec['val_dbId'], rel=rec['rtyp'])) if prt: prt.write(' {HMS} {N:,} rel-dbIds: {Q}\n'.format( HMS=get_hms(self.tic), N=len(dbid2ntset), Q=self._shorten_queryprt(qry))) return {dbid: vals for dbid, vals in dbid2ntset.items()}
def wrpy_pw2molecules(self, fout_py, database='UniProt'): """Print the Participating molecules for a pathway.""" pw2molecules = self.get_pw2molecules(database) molecules = set(m for ms in pw2molecules.values() for m in ms) hms = get_hms(TIC) msg = '{N:4} Pathways contain {M:5} items from {DB}'.format( N=len(pw2molecules), M=len(molecules), DB=database) with open(os.path.join(REPO, fout_py), 'w') as prt: prt_docstr_module(msg, prt) prt.write('# pylint: disable=line-too-long, too-many-lines\n') prt.write('PWY2{ITEM}S = {{\n'.format(ITEM=database.upper())) for pwy, molecules in sorted(pw2molecules.items(), key=lambda t: [int(t[0].split('-')[2]), t[0]]): prt.write(" '{PWY}':".format(PWY=pwy)) mstrs = ["'{V}'".format(V=m) for m in sorted(molecules)] prt.write("{{{SET}}},\n".format(SET=", ".join(mstrs))) # prt_namedtuple(self.dcts, 'SPECIES', fields, prt) prt.write('}\n') prt_copyright_comment(prt) filesize = int(os.stat(os.path.join(REPO, fout_py)).st_size/1000000.0) print(" {HMS} {MB} Mbytes {MSG} WROTE: {PY}".format(HMS=hms, MB=filesize, MSG=msg, PY=fout_py))
def get_dbid2ntnodes(self, qry, prt=sys.stdout): """Get a set of tuples (rel, dst.dbId) for each specified source dbId from Reactome.""" dbid2ntnodes = cx.defaultdict(dict) # Ex: MATCH (s:InstanceEdit)-[r]->(d:Figure) RETURN s, type(r) AS rtyp, d ntobjkey = cx.namedtuple('NtIdRel', 'dbId rel') ntobjnode = cx.namedtuple('NtSRD', 'src rel dst') with self.gdbdr.session() as session: for rec in session.run(qry).records(): src = rec['s'] rel = rec['rtyp'] dst = rec['d'] ntkey = ntobjkey(dbId=dst['dbId'], rel=rel) ntnodes = ntobjnode(src=Neo4jNode(src), rel=rel, dst=Neo4jNode(dst)) dbid2ntnodes[dst['dbId']][ntkey] = ntnodes if prt: prt.write(' {HMS} {N:,} rel-dbIds: {Q}\n'.format( HMS=get_hms(self.tic), N=len(dbid2ntnodes), Q=self._shorten_queryprt(qry))) return {dbid: vals for dbid, vals in dbid2ntnodes.items()}
def _addval_src_rel_dst(self, pat, dbid2nodebasic, session): """Get dict w/parameter values and relationships w/their destination dbIds.""" # MATCH (s:DatabaseObject{dbId:ID})-[r]->(d) RETURN s, r, d.dbId AS d_Id dbid2dct = {} #### dbid2nodenorel = {} tic = timeit.default_timer() for dbid, nodebasic in dbid2nodebasic.items(): qry = pat.replace('ID', str(dbid)) for rec in session.run(qry).records(): #### nodebasic.dct = nodebasic.objsch.get_dict(rec['s']) dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['s']) rel = rec['r'].type dstid = rec['d_Id'] if dstid in dbid2nodebasic and rel not in self.excl_rel: nodebasic.relationship[rel].add(dbid2nodebasic[dstid]) #### if not nodebasic.relationship: #### dbid2nodenorel[dbid] = nodebasic print(' HMS: {HMS} {N:6,} dbIds: {Q}'.format(HMS=get_hms(tic), N=len(dbid2dct), Q=qry)) #### print(' HMS: {HMS} {N:6,} dbIds: {Q}'.format( #### HMS=get_hms(tic), N=len(dbid2nodebasic)-len(dbid2nodenorel), Q=qry)) #### return dbid2nodenorel return dbid2dct