示例#1
0
nfolds = argData['crossval_folds']  # applicable for average CV

#Nodes
nodes = [ProteinInteractionNode, KeggNode, ReactomeNode, GoNode, InterproNode]

#staticFeatures = []
staticFeatures = argData['static_data'].split(',')
logging.info(staticFeatures)

logging.info("--- USING {0} METAPATH FEATURE SETS".format(len(nodes)))
logging.info("--- USING {0} STATIC FEATURE SETS".format(len(staticFeatures)))

#fetch the description of proteins and pathway_ids
dbAdapter = OlegDB()
idDescription = dbAdapter.fetchPathwayIdDescription()  #fetch the description
idNameSymbol = dbAdapter.fetchSymbolForProteinId(
)  #fetch name and symbol for protein

if fileData is not None:
    #logging.info("FOUND {0} POSITIVE LABELS".format(len(fileData[True])))
    #logging.info("FOUND {0} NEGATIVE LABELS".format(len(fileData[False])))
    trainData = metapathFeatures(disease,
                                 currentGraph,
                                 nodes,
                                 idDescription,
                                 staticFeatures,
                                 loadedLists=fileData).fillna(0)
else:
    trainData = metapathFeatures(disease, currentGraph, nodes, idDescription,
                                 staticFeatures).fillna(0)
'''
# directory and file name for the ML Model
示例#2
0
        if isinstance(n, str) and n[0:3] == "GO:"
    ]
    logging.info("GO nodes: %d" % (len(goNodes)))
    interNodes = [
        n for n in list(pdg.graph.nodes)
        if isinstance(n, str) and n[0:3] == "IPR"
    ]
    logging.info("INTERPRO nodes: %d" % (len(interNodes)))
    # pfamNodes = [n for n in list(pdg.graph.nodes) if isinstance(n, str) and n[0:2]=="PF"]
    # logging.info("Pfam nodes: %d"%(len(pfamNodes)))
    # prositeNodes = [n for n in list(pdg.graph.nodes) if isinstance(n, str) and n[0:2]=="PS"]
    # logging.info("PROSITE nodes: %d"%(len(prositeNodes)))

    # Fetch node/edge information from db.
    idDescription = dbad.fetchPathwayIdDescription()
    idSymbol = dbad.fetchSymbolForProteinId()
    try:
        idUniprot = dbad.fetchUniprotForProteinId()
    except Exception as e:
        logging.error("No Uniprot in OlegDB: {0}".format(e))
        idUniprot = {}

    # add name, symbol and uniprot id to graph nodes
    for n in pdg.graph.nodes:
        if n in idUniprot:
            pdg.graph.nodes[n]['UniprotId'] = idUniprot[n]
        else:
            pdg.graph.nodes[n]['UniprotId'] = ''
        if n in idSymbol:
            pdg.graph.nodes[n]['Symbol'] = idSymbol[n]
        else: