nfolds = argData['crossval_folds'] # applicable for average CV #Nodes nodes = [ProteinInteractionNode, KeggNode, ReactomeNode, GoNode, InterproNode] #staticFeatures = [] staticFeatures = argData['static_data'].split(',') logging.info(staticFeatures) logging.info("--- USING {0} METAPATH FEATURE SETS".format(len(nodes))) logging.info("--- USING {0} STATIC FEATURE SETS".format(len(staticFeatures))) #fetch the description of proteins and pathway_ids dbAdapter = OlegDB() idDescription = dbAdapter.fetchPathwayIdDescription() #fetch the description idNameSymbol = dbAdapter.fetchSymbolForProteinId( ) #fetch name and symbol for protein if fileData is not None: #logging.info("FOUND {0} POSITIVE LABELS".format(len(fileData[True]))) #logging.info("FOUND {0} NEGATIVE LABELS".format(len(fileData[False]))) trainData = metapathFeatures(disease, currentGraph, nodes, idDescription, staticFeatures, loadedLists=fileData).fillna(0) else: trainData = metapathFeatures(disease, currentGraph, nodes, idDescription, staticFeatures).fillna(0) ''' # directory and file name for the ML Model
if isinstance(n, str) and n[0:3] == "GO:" ] logging.info("GO nodes: %d" % (len(goNodes))) interNodes = [ n for n in list(pdg.graph.nodes) if isinstance(n, str) and n[0:3] == "IPR" ] logging.info("INTERPRO nodes: %d" % (len(interNodes))) # pfamNodes = [n for n in list(pdg.graph.nodes) if isinstance(n, str) and n[0:2]=="PF"] # logging.info("Pfam nodes: %d"%(len(pfamNodes))) # prositeNodes = [n for n in list(pdg.graph.nodes) if isinstance(n, str) and n[0:2]=="PS"] # logging.info("PROSITE nodes: %d"%(len(prositeNodes))) # Fetch node/edge information from db. idDescription = dbad.fetchPathwayIdDescription() idSymbol = dbad.fetchSymbolForProteinId() try: idUniprot = dbad.fetchUniprotForProteinId() except Exception as e: logging.error("No Uniprot in OlegDB: {0}".format(e)) idUniprot = {} # add name, symbol and uniprot id to graph nodes for n in pdg.graph.nodes: if n in idUniprot: pdg.graph.nodes[n]['UniprotId'] = idUniprot[n] else: pdg.graph.nodes[n]['UniprotId'] = '' if n in idSymbol: pdg.graph.nodes[n]['Symbol'] = idSymbol[n] else: