def testCreateIndicatorLabels(self): metaUtils = MetabolomicsUtils() X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData() YCortisol = YCortisol[numpy.logical_not(numpy.isnan(YCortisol))] YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"]) YTesto = YTesto[numpy.logical_not(numpy.isnan(YTesto))] YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"]) YIgf1 = YIgf1[numpy.logical_not(numpy.isnan(YIgf1))] YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"]) s = numpy.sum(YCortisolIndicators, 1) nptst.assert_array_equal(s, numpy.ones(s.shape[0])) s = numpy.sum(YTestoIndicators, 1) nptst.assert_array_equal(s, numpy.ones(s.shape[0])) s = numpy.sum(YIgf1Indicators, 1) nptst.assert_array_equal(s, numpy.ones(s.shape[0])) #Now compare to those labels in the file X, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData() dataDir = PathDefaults.getDataDir() + "metabolomic/" fileName = dataDir + "data.RMN.total.6.txt" data = pandas.read_csv(fileName, delimiter=",") YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"]) YCortisolIndicators2 = numpy.array(data[["Ind.Cortisol.1", "Ind.Cortisol.2", "Ind.Cortisol.3"]]) for i in range(YCortisolIndicators.shape[0]): if not numpy.isnan(YCortisol[i]) and not numpy.isnan(YCortisolIndicators2[i, :]).any(): #nptst.assert_almost_equal(YCortisolIndicators2[i, :], YCortisolIndicators[i, :]) pass YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"]) YTestoIndicators2 = numpy.array(data[["Ind.Testo.1", "Ind.Testo.2", "Ind.Testo.3"]]) for i in range(YTestoIndicators.shape[0]): if not numpy.isnan(YTesto[i]) and not numpy.isnan(YTestoIndicators2[i, :]).any(): #print(i, YTesto[i]) nptst.assert_almost_equal(YTestoIndicators2[i, :], YTestoIndicators[i, :]) YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"]) YIgf1Indicators2 = numpy.array(data[["Ind.IGF1.1", "Ind.IGF1.2", "Ind.IGF1.3"]]) for i in range(YIgf1Indicators.shape[0]): if not numpy.isnan(YIgf1[i]) and not numpy.isnan(YIgf1Indicators2[i, :]).any(): #print(i, YIgf1[i]) #nptst.assert_almost_equal(YIgf1Indicators2[i, :], YIgf1Indicators[i, :]) pass
def saveResults(self): """ Compute the results and save them for a particular hormone. Does so for all learners. """ metaUtils = MetabolomicsUtils() logging.debug("Running on hormones: " + str(self.hormoneDict.keys())) for hormoneName, hormoneConc in self.hormoneDict.items(): nonNaInds = numpy.logical_not(numpy.isnan(hormoneConc)) hormoneIndicators = metaUtils.createIndicatorLabel(hormoneConc, metaUtils.boundsDict[hormoneName]) for i in range(hormoneIndicators.shape[1]): #Make labels -1/+1 Y = numpy.array(hormoneIndicators[nonNaInds, i], numpy.int)*2-1 for dataName, dataFeatures in self.dataDict.items(): X = dataFeatures[nonNaInds, :] X = numpy.c_[X, self.ages[nonNaInds]] X = Standardiser().standardiseArray(X) if self.runCartTreeRank: fileName = self.resultsDir + "CartTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.cartTreeRank, self.cartTreeRankParams, fileName) if self.runRbfSvmTreeRank: fileName = self.resultsDir + "RbfSvmTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.rbfSvmTreeRank, self.rbfSvmTreeRankParams, fileName) if self.runL1SvmTreeRank: fileName = self.resultsDir + "L1SvmTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.l1SvmTreeRank, self.l1SvmTreeRankParams, fileName) #For this SVM save the weight vector weightsFileName = self.resultsDir + "WeightsL1SvmTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveWeightVectorResults(X, Y, self.l1SvmTreeRank, self.l1SvmTreeRankParams, weightsFileName) if self.runCartTreeRankForest: fileName = self.resultsDir + "CartTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.cartTreeRankForest, self.cartTreeRankForestParams, fileName) if self.runRbfSvmTreeRankForest: fileName = self.resultsDir + "RbfSvmTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.rbfSvmTreeRankForest, self.rbfSvmTreeRankForestParams, fileName) if self.runL1SvmTreeRankForest: fileName = self.resultsDir + "L1SvmTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.l1SvmTreeRankForest, self.l1SvmTreeRankForestParams, fileName) #For this SVM save the weight vector weightsFileName = self.resultsDir + "WeightsL1SvmTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveWeightVectorResults(X, Y, self.l1SvmTreeRankForest, self.l1SvmTreeRankForestParams, weightsFileName) if self.runRankBoost: fileName = self.resultsDir + "RankBoost-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.rankBoost, self.rankBoostParams, fileName) if self.runRankSVM: fileName = self.resultsDir + "RankSVM-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy" self.saveResult(X, Y, self.rankSVM, self.rankSVMParams, fileName) logging.debug("All done. See you around!")
metaUtils = MetabolomicsUtils() X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData() dataDict = {} numpy.random.seed(datetime.datetime.now().microsecond) helper = MetabolomicsExpHelper(dataDict, YCortisol, YTesto, YIgf1, ages) dataNames =[] dataNames.extend(["raw", "pca", "Db4", "Db8", "Haar"]) #algorithms = ["CartTreeRank", "CartTreeRankForest", "L1SvmTreeRank", "L1SvmTreeRankForest", "RbfSvmTreeRank", "RbfSvmTreeRankForest", "RankBoost", "RankSVM"] algorithms = ["CartTreeRankForest", "L1SvmTreeRankForest", "RbfSvmTreeRankForest", "RankBoost", "RankSVM"] algorithmsAbbr = ["CART-TRF", "L1-TRF", "RBF-TRF", "RB", "RSVM"] hormoneNameIndicators = [] for i, (hormoneName, hormoneConc) in enumerate(helper.hormoneDict.items()): hormoneIndicators = metaUtils.createIndicatorLabel(hormoneConc, metaUtils.boundsDict[hormoneName]) for j in range(hormoneIndicators.shape[1]): hormoneNameIndicators.append(hormoneName + "-" +str(j)) numIndicators = 3 testAucsMean = numpy.zeros((len(hormoneNameIndicators), len(dataNames), len(algorithms))) testAucsStd = numpy.zeros((len(hormoneNameIndicators), len(dataNames), len(algorithms))) numMissingFiles = 0 for i, hormoneNameIndicator in enumerate(hormoneNameIndicators): for j, dataName in enumerate(dataNames): for k, alg in enumerate(algorithms): fileName = resultsDir + alg + "-" + hormoneNameIndicator + "-" + dataName + ".npy" try: