iplMatrix = None global verbose, globalList, globalPathway for o, a in opts: if o == "-q": verbose = False elif o == "-i": iplMatrix = a ## build sourceList typeFile = "/".join(re.split("/", sifFile)[:-2] + ["TYPE.NA"]) sourceList = re.split("/", sifFile)[-2] + ".list_t" h = mData.rList(featureFile) (n, i) = mPathway.rSIF(sifFile, typef=typeFile) (gn, gi) = mPathway.rPathway(globalPathway) p = mPathway.Pathway(n, i) s = mPathway.sortConnected(p) f = open(sourceList, "w") c = 1 for i in s: u = list(set(i) & set(h)) if len(u) >= 5: f.write("component_%s\t%s\n" % (c, "\t".join(u))) c += 1 break f.write("component_all\t%s\n" % ("\t".join(list(set(n.keys()) & set(h))))) f.write("all\t%s\n" % ("\t".join(list(set(gn.keys()) & set(h))))) f.close() ## build overlapList
try: opts, args = getopt.getopt(args, "o:q") except getopt.GetoptError, err: print str(err) usage(2) if len(args) != 1: log("ERROR: incorrect number of arguments", die=True) inf = args[0] outf = None global verbose for o, a in opts: if o == "-o": outf = a elif o == "-q": verbose = False ## execute (n, i) = mPathway.rPathway(inf) p = mPathway.Pathway(n, i) p.selfTest() if outf != None: mPathway.wPathway(outf, p.nodes, p.interactions) if __name__ == "__main__": main(sys.argv[1:])
parser.add_option( "--flattened", type="string", dest="flattened", action="store", help= "Join genes with all pathway links for complexes and families they belong to. Print that network with just proteins" ) (options, args) = parser.parse_args() # nodes: # name -> type # interactions: # name -> interacting nodes nodes, Interactions, Proteins = mPathway.rPathway(options.pathway_file, reverse=False, retProteins=True) rev_nodes, revInteractions = mPathway.rPathway(options.pathway_file, reverse=True, retProteins=False) # maps complex strings to the components in each componentMap = mPathway.getComponentMap(rev_nodes, revInteractions) complexRE = re.compile(".*\((complex|family)\).*") abstractRE = re.compile(".*\(abstract\).*") # print out a 2-column interactions file of simple PPIs # protein -> protein ppi_edges = {} tf_edges = {}
## parse arguments try: opts, args = getopt.getopt(args, "o:q") except getopt.GetoptError, err: print str(err) usage(2) if len(args) != 1: log("ERROR: incorrect number of arguments", die = True) inf = args[0] outf = None global verbose for o, a in opts: if o == "-o": outf = a elif o == "-q": verbose = False ## execute (n, i) = mPathway.rPathway(inf) p = mPathway.Pathway(n, i) p.selfTest() if outf != None: mPathway.wPathway(outf, p.nodes, p.interactions) if __name__ == "__main__": main(sys.argv[1:])
# Date: 1-29-12 # takes a sets file from expand.pl and follows interactions on non-leaf nodes # to get an expanded set. Must specify a regex key for non-leaf nodes on input # (default is "(abstract)") # # The current use for this is to find the transitive neighbors of abstract # concepts in the superpathway import re, sys, mPathway from optparse import OptionParser parser = OptionParser() parser.add_option("-r","--non_leaf",type="string",dest="non_leaf_node", action="store", help="Non leaf regex string key", default=None) parser.add_option("-p","--pathway_file",type="string",dest="pathway_file", action="store", help="superpathway file") (options, args) = parser.parse_args() rev_nodes, revInteractions = mPathway.rPathway(options.pathway_file, reverse = True, retProteins = False) # maps complex strings to the components in each componentMap = mPathway.getComponentMap(rev_nodes, revInteractions) space2under = re.compile(' ') under2space = re.compile('_') abstractRE = re.compile(".*\(abstract\).*") complexRE = re.compile(".*\((complex|family)\).*") if options.non_leaf_node is None: nonLeafRE = abstractRE else: nonLeafRE = re.compile(options.non_leaf_node) # get the constituents of a complex
prefix = args[0] inputArguments = args[1:] global verbose for o, a in opts: if o == "-q": verbose = False ## execute inputPathways = [] for element in inputArguments: if os.path.isdir(element): for file in os.listdir(element): if file.endswith("pathway.tab"): inputPathways.append(file) elif element.endswith("pathway.tab"): inputPathways.append(element) ## append pathways outPathway = mPathway.Pathway({}, {}) for file in inputPathways: (nodes, interactions) = mPathway.rPathway(file) appendPathway = mPathway.Pathway(nodes, interactions) outPathway = mPathway.combinePathways(outPathway, appendPathway) ## write pathways mPathway.wPathway(prefix, outPathway.nodes, outPathway.interactions) if __name__ == "__main__": main(sys.argv[1:])
def filterNet(files, phenotypes = [], statLine = None, outDir = None): global filterBounds filterString = "%s_%s" % (filterBounds[0], filterBounds[1]) ## read global pathway (gNodes, gInteractions) = mPathway.rPathway(globalPathway) ## read drugs #drugData = mData.rSet(drugBank) ## write LABEL.NA, TYPE.NA if outputAttributes: typef = open("TYPE.NA", "w") labelf = open("LABEL.NA", "w") typef.write("TYPE (class=java.lang.String)\n") labelf.write("LABEL (class=java.lang.String)\n") for i in gNodes.keys(): typef.write("%s = %s\n" % (i, gNodes[i])) if gNodes[i] == "protein": labelf.write("%s = %s\n" % (i, i)) else: labelf.write("%s = %s\n" % (i, "")) #drugs here typef.close() labelf.close() ## read scores uData = dict() sData = dict() for i in range(len(files)): uData[i] = mData.rCRSData(files[i]) sData[i] = dict() for j in uData[i].keys(): sData[i][j] = dict() for k in uData[i][j].keys(): try: sData[i][j][k] = abs(float(uData[i][j][k])) except ValueError: sData[i][j][k] = "NA" ## iterate phenotypes for p in sData[0].keys(): if len(phenotypes) > 0: if p not in phenotypes: continue pNodes = dict() pInteractions = dict() ## write SCORE.NA if outputAttributes: scoref = open(p+"_SCORE.NA", "w") scoref.write("SCORE (class=java.lang.Float)\n") for i in gNodes.keys(): if i in uData[0][p]: if uData[0][p][i] == "NA": scoref.write("%s = %s\n" % (i, "0")) else: scoref.write("%s = %s\n" % (i, uData[0][p][i])) else: scoref.write("%s = %s\n" % (i, "0")) scoref.close() ## compute thresholds pStats = [] if statLine == None: for i in range(len(sData.keys())): pStats.append(mCalculate.mean_std(sData[i][p].values())) else: for i in re.split(",",statLine): (v1, v2) = re.split(";",i) pStats.append((float(v1), float(v2))) log("%s\t%s;%s" % (p, pStats[0][0], pStats[0][1])) for i in range(1, len(pStats)): log(",%s;%s" % (pStats[i][0], pStats[i][1])) log("\n") ## iterate links for a in gInteractions.keys(): if a not in sData[0][p]: continue elif sData[0][p][a] == "NA": continue for b in gInteractions[a].keys(): if b not in sData[0][p]: continue elif sData[0][p][b] == "NA": continue ## score nodes by threshold aScore = [] bScore = [] linkScore = [] for i in range(len(sData.keys())): linkScore.append([sData[i][p][a], sData[i][p][b]]) for i in range(len(sData.keys())): if linkScore[i][0] > pStats[i][0]+filterBounds[1]*pStats[i][1]: aScore.append(2) elif linkScore[i][0] > pStats[i][0]+filterBounds[0]*pStats[i][1]: aScore.append(1) else: aScore.append(0) if linkScore[i][1] > pStats[i][0]+filterBounds[1]*pStats[i][1]: bScore.append(2) elif linkScore[i][1] > pStats[i][0]+filterBounds[0]*pStats[i][1]: bScore.append(1) else: bScore.append(0) ## selection rule if includeType == "OR": if max(aScore)+max(bScore) >= 3: (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions) elif includeType == "AND": votes = 0 for i in range(len(sData.keys())): if aScore[i]+bScore[i] >= 3: votes += 0 if votes == len(sData.keys()): (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions) elif includeType == "MAIN": if aScore[0]+bScore[0] >= 3: (pNodes, pInteractions) = addLink(a, b, pNodes, pInteractions, gNodes, gInteractions) ## connect top scoring disconnected nodes sortedTop = [] for i in sData[0][p].keys(): if i not in gNodes: continue if gNodes[i] in ["protein"]: sortedTop.append(i) sortedTop.sort(lambda x, y: cmp(sData[0][p][y],sData[0][p][x])) while (sData[0][p][sortedTop[0]] == "NA"): sortedTop.pop(0) if len(sortedTop) == 0: break for i in range(topDisconnected): if i > len(sortedTop)-1: break if sData[0][p][sortedTop[i]] < pStats[0][0]+filterBounds[0]*pStats[0][1]: break if sortedTop[i] not in gNodes: continue if sortedTop[i] not in pNodes: pNodes[sortedTop[i]] = gNodes[sortedTop[i]] pInteractions[sortedTop[i]] = dict() pInteractions[sortedTop[i]]["__DISCONNECTED__"] = "-disconnected-" ## output if outDir == None: wrtDir = p else: wrtDir = outDir if not os.path.exists(wrtDir): os.system("mkdir %s" % (wrtDir)) ## output for pathway-predictor if outputPARADIGM: protSet = set() for i in gNodes: if gNodes[i] == "protein": protSet.update([i]) netNodes = mPathway.sortConnected(pNodes, pInteractions, mPathway.revInteractions(pInteractions)) trainNodes = [] for i in netNodes: if len((protSet) & set(i)) > featureReq: trainNodes += i if len(trainNodes) == 0: log("ERROR: no nets contained enough data\n...trying again\n") if filterBounds[0]+0.1 <= filterBounds[1]: filterBounds[1] -= 0.1 else: filterBounds[0] -= 0.1 filterBounds[1] -= 0.1 filterNet(files, phenotypes = phenotypes, statLine = statLine, outDir = outDir) sys.exit(0) (lNodes, lInteractions) = mPathway.constructInteractions(trainNodes, pNodes, pInteractions) if outputAttributes: mPathway.wSIF("%s/%s_%s_pp.sif" % (wrtDir, p, filterString), lInteractions) ## connect class node classNode = "class" lInteractions[classNode] = dict() for i in lNodes.keys(): if i not in protSet: continue lInteractions[classNode][i] = "-cl>" lNodes[classNode] = "active" mPathway.wPathway("%s/%s_%s_pp.tab" % (wrtDir, p, filterString), lNodes, lInteractions) ## output nodrug pathway else: mPathway.wSIF("%s/%s_%s_nodrug.sif" % (wrtDir, p, filterString), pInteractions) (cpNodes, cpInteractions) = mPathway.filterComplexesByGeneSupport(pNodes, pInteractions, mPathway.revInteractions(pInteractions), gNodes, mPathway.getComponentMap(gNodes, mPathway.revInteractions(gInteractions))) mPathway.wSIF("%s/%s_%s_nodrug_cleaned.sif" % (wrtDir, p, filterString), cpInteractions)
inputArguments = args[1:] global verbose for o, a in opts: if o == "-q": verbose = False ## execute inputPathways = [] for element in inputArguments: if os.path.isdir(element): for file in os.listdir(element): if file.endswith("pathway.tab"): inputPathways.append(file) elif element.endswith("pathway.tab"): inputPathways.append(element) ## append pathways outPathway = mPathway.Pathway({}, {}) for file in inputPathways: (nodes, interactions) = mPathway.rPathway(file) appendPathway = mPathway.Pathway(nodes, interactions) outPathway = mPathway.combinePathways(outPathway, appendPathway) ## write pathways mPathway.wPathway(prefix, outPathway.nodes, outPathway.interactions) if __name__ == "__main__": main(sys.argv[1:])
if len(args) != 3: log("ERROR: incorrect number of arguments", die = True) featureFile = args[0] pathwayFile = args[1] scoreFile = args[2] global verbose for o, a in opts: if o == "-q": verbose = False ## execute featureList = mData.rList(featureFile) (gNodes, gInteractions) = mPathway.rPathway(pathwayFile) scoreMap = {} scoreMap[sessionName] = mData.r2Col(scoreFile) ## find connected connectList = set() for source in featureList: if source not in gNodes: continue for target in featureList: if target not in gNodes: continue if source == target: continue paths = mPathway.shortestPath(source, target, gInteractions, maxDistance = maxDistance) if len(paths) == 0: