def main(): (elutionF, refF, outD) = sys.argv[1:] reference, elutionData, scoreCalc = calcS.loadData(refF, elutionF) iexFractions = range(1, 49) out = np.array([[-1.00] * 48] * 48) for removeLeft in range(1, 49): tmpFracs = copy.copy(iexFractions) for i in range(1, removeLeft): if i in tmpFracs: tmpFracs.remove(i) for removeRight in reversed(range(removeLeft + 1, 50)): if removeRight in tmpFracs: tmpFracs.remove(removeRight) print tmpFracs fractions = getIEXFracs(tmpFracs) tmpElution = copy.copy(elutionData) tmpElution.getSubset(fractions) scoreCalc = calcS.CalculateCoElutionScores(tmpElution) scoreCalc.calculateAllScores([calcS.Euclidiean()], reference) data, targets = scoreCalc.toSklearnData() clf = calcS.RandomForest(data, targets) scores = clf.getValScores() out[removeLeft - 1][49 - removeRight] = scores[1] print "%i\t%i\t%.2f" % (removeLeft - 1, 49 - removeRight, scores[1]) outFH = open(outD + ".iex.dat", "w") print >> outFH, "\t" + "\t".join(map(str, range(48))) for i in range(48): print >> outFH, "%i\t%s" % (i, "\t".join(map("{0:.2f}".format, out[i]))) outFH.close()
def main(): (scoreF, refF, elutionF, geneNameF, outF) = sys.argv[1:] geneNameFH = open(geneNameF) geneName= {} species = {} for line in geneNameFH: line = line.rstrip() ida, idb, spec = line.split("\t") if ida not in geneName: geneName[ida] = set([]) geneName[ida].add(idb) species[ida] = spec species[idb] = spec geneNameFH.close() toLearn, toPred = calcS.loadScoreData(scoreF, refF) rfc = calcS.trainML(toLearn) print rfc.getValScores() ref, eluD, calc = calcS.loadData(refF, elutionF) calc.calculate2DScores(ref) outFH = open(outF + ".arff", "w") outFH.write(calc.toArffData()) outFH.close() print "Calculated scores" rfc2 = calcS.trainML(calc) print rfc2.getValScores() data, targets = toPred.toSklearnData() dataL, targetsL = toLearn.toSklearnData() preds = rfc.predict(data) prots = [] for protA, protB, label in toPred.scores: prots.append((protA, protB)) outFH = open(outF, "w") for i in range(len(preds)): protA, protB = prots[i] if protA in geneName: geneA = ",".join(geneName[protA]) if protB in geneName: geneB = ",".join(geneName[protB]) spec = species[protA] if preds[i][1]>0.5: print >> outFH, "%s\t%s\t%s\t%s\t%s\t%f" % (protA, protB, geneA, geneB, spec, preds[i][1]) outFH.close()
def main(): (elutionF, refF, windowSize, outF) = sys.argv[1:] windowSize = int(windowSize) outData = ['']*3 reference, elutionData, scoreCalc = calcS.loadData(refF, elutionF) j = 0 name = elutionF.split("Ce_")[1].split(".")[0] for resultScore in getFracEvals(elutionData.elutionMat): data_lines = entropyVSprecision(elutionData, reference, resultScore, windowSize) for i in range(len(data_lines)): outData[j] += "\n%s\t%i\t%s" % (name, windowSize, data_lines[i]) j += 1 if len(outData[0]) != 0: printTable("%s_%s_Entropy_%i.dat" % (outF, name, windowSize), "Entropy", outData[0]) printTable("%s_%s_Prot-prob_%i.dat" % (outF, name, windowSize), "Prot-prob", outData[1]) printTable("%s_%s_Num-prots_%i.dat" % (outF, name, windowSize), "Num-prots", outData[2])
def main(): (elutionFiles, refF, direction, outF) = sys.argv[1:] elutionFilesFH = open(elutionFiles) outData = {} maxSize = 0 for line in elutionFilesFH: line = line.rstrip() reference, elutionData, scoreCalc = calcS.loadData(refF, line) scores = removeFracs(elutionData, reference, scoreCalc, direction) name = line.split("Ce_")[2].split(".")[0] outData[name] = scores maxSize = max(len(scores), maxSize) elutionFilesFH.close() outFH = open(outF, "w") print >> outFH, "Experiment_name\tFraction_%s" % ("\tFraction_".join(map(str,range(1, maxSize+1)))) for dataset in outData: scores = outData[dataset] numFracs = len(scores) outline = "%s\t%s" % (dataset, "\t".join(map(str, scores))) if maxSize-numFracs > 0: outline = "%s\t%s" % (outline, "\t".join(["NA"]*(maxSize-numFracs))) print >> outFH, outline outFH.close()