Пример #1
0
def main():
    (elutionF, refF, outD) = sys.argv[1:]
    reference, elutionData, scoreCalc = calcS.loadData(refF, elutionF)
    iexFractions = range(1, 49)

    out = np.array([[-1.00] * 48] * 48)
    for removeLeft in range(1, 49):
        tmpFracs = copy.copy(iexFractions)
        for i in range(1, removeLeft):
            if i in tmpFracs:
                tmpFracs.remove(i)
        for removeRight in reversed(range(removeLeft + 1, 50)):
            if removeRight in tmpFracs:
                tmpFracs.remove(removeRight)
            print tmpFracs
            fractions = getIEXFracs(tmpFracs)
            tmpElution = copy.copy(elutionData)
            tmpElution.getSubset(fractions)
            scoreCalc = calcS.CalculateCoElutionScores(tmpElution)
            scoreCalc.calculateAllScores([calcS.Euclidiean()], reference)
            data, targets = scoreCalc.toSklearnData()
            clf = calcS.RandomForest(data, targets)
            scores = clf.getValScores()
            out[removeLeft - 1][49 - removeRight] = scores[1]
            print "%i\t%i\t%.2f" % (removeLeft - 1, 49 - removeRight, scores[1])

    outFH = open(outD + ".iex.dat", "w")
    print >> outFH, "\t" + "\t".join(map(str, range(48)))
    for i in range(48):
        print >> outFH, "%i\t%s" % (i, "\t".join(map("{0:.2f}".format, out[i])))
    outFH.close()
def main():
	(scoreF, refF, elutionF, geneNameF, outF) = sys.argv[1:]
	
	geneNameFH = open(geneNameF)
	geneName= {}
	species = {}
	for line in geneNameFH:
		line = line.rstrip()
		ida, idb, spec = line.split("\t")
		if ida not in geneName: geneName[ida] = set([])
		geneName[ida].add(idb)
		species[ida] = spec
		species[idb] = spec 
	geneNameFH.close()

	toLearn, toPred = calcS.loadScoreData(scoreF, refF)
	
	rfc =  calcS.trainML(toLearn)
	print rfc.getValScores()
	
	ref, eluD, calc = calcS.loadData(refF, elutionF)
	
	
	calc.calculate2DScores(ref)
	outFH = open(outF + ".arff", "w")
	outFH.write(calc.toArffData())
	outFH.close()
	print "Calculated scores"
	
	rfc2 =  calcS.trainML(calc)
	print rfc2.getValScores()
	
	data, targets = toPred.toSklearnData()
	dataL, targetsL = toLearn.toSklearnData()
	preds = rfc.predict(data)
	prots = []
	for protA, protB, label in toPred.scores:
		prots.append((protA, protB))

	outFH = open(outF, "w")
	for i in range(len(preds)):
		protA, protB = prots[i]
		if protA in geneName: geneA = ",".join(geneName[protA])
		if protB in geneName: geneB = ",".join(geneName[protB])
		spec = species[protA]
		if preds[i][1]>0.5:
			print >> outFH, "%s\t%s\t%s\t%s\t%s\t%f" % (protA, protB, geneA, geneB, spec, preds[i][1])
	outFH.close()
Пример #3
0
def main():
	(elutionF, refF, windowSize, outF) = sys.argv[1:]
	windowSize = int(windowSize)
	outData = ['']*3
	reference, elutionData, scoreCalc = calcS.loadData(refF, elutionF)
	j = 0
	name = elutionF.split("Ce_")[1].split(".")[0]
	for resultScore in getFracEvals(elutionData.elutionMat):
		data_lines = entropyVSprecision(elutionData, reference, resultScore, windowSize)
		for i in range(len(data_lines)):
			outData[j] += "\n%s\t%i\t%s" % (name, windowSize, data_lines[i])
		j += 1

	if len(outData[0]) != 0: 
		printTable("%s_%s_Entropy_%i.dat" % (outF, name, windowSize), "Entropy", outData[0])
		printTable("%s_%s_Prot-prob_%i.dat" % (outF, name, windowSize), "Prot-prob", outData[1])
		printTable("%s_%s_Num-prots_%i.dat" % (outF, name, windowSize), "Num-prots", outData[2])
Пример #4
0
def main():
	(elutionFiles, refF, direction, outF) = sys.argv[1:]
	elutionFilesFH = open(elutionFiles)
	outData = {}
	maxSize = 0
	for line in elutionFilesFH:
		line = line.rstrip()
		reference, elutionData, scoreCalc = calcS.loadData(refF, line)
		scores = removeFracs(elutionData, reference, scoreCalc, direction)
		name = line.split("Ce_")[2].split(".")[0]
		outData[name] = scores
		maxSize = max(len(scores), maxSize)
	elutionFilesFH.close()
	
	outFH = open(outF, "w")
	print >> outFH, "Experiment_name\tFraction_%s" % ("\tFraction_".join(map(str,range(1, maxSize+1))))
	for dataset in outData:
		scores = outData[dataset]
		numFracs = len(scores)
		outline = "%s\t%s"  % (dataset, "\t".join(map(str, scores)))
		if maxSize-numFracs > 0:
			outline = "%s\t%s" % (outline, "\t".join(["NA"]*(maxSize-numFracs)))
		print >> outFH, outline
	outFH.close()