def checkDistributions(): matFileName = "../../data/EgoAlterTransmissions.mat" examplesList = ExamplesList.readFromMatFile(matFileName) numFeatures = examplesList.getDataFieldSize("X", 1) X = examplesList.getDataField("X")[:, 0:numFeatures/2] Z = examplesList.getDataField("X")[:, numFeatures/2:numFeatures] y = examplesList.getDataField("y") A = Z[y==-1, :] #Now load directly from the CSV file #Learn the distribution of the egos eCsvReader = EgoCsvReader() egoFileName = "../../data/EgoData.csv" alterFileName = "../../data/AlterData.csv" egoQuestionIds = eCsvReader.getEgoQuestionIds() alterQuestionIds = eCsvReader.getAlterQuestionIds() (X2, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds) X2[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(X2[:, eCsvReader.ageIndex]) (mu, sigma) = Util.computeMeanVar(X) (mu2, sigma2) = Util.computeMeanVar(X2) (mu3, sigma3) = Util.computeMeanVar(Z) (mu4, sigma4) = Util.computeMeanVar(A) #Seems okay. Next check alters print(("Mean " + str(mu - mu4))) print(("Variance " + str(numpy.diag(sigma - sigma4)))) """ Analysis between the Egos in EgoData.csv and those in EgoAlterTransmissions.mat reveals that the distributions match closely. The main differences are in the means and variances in Q44A - D, but this isn't too suprising. """ """
examplesFileName = SvmInfoExperiment.getExamplesFileName() sampleSize = 86755 svmEgoSimulator = SvmEgoSimulator(examplesFileName) preprocessor = svmEgoSimulator.getPreProcessor() centerValues = preprocessor.getCentreVector() svmParamsFileName = SvmInfoExperiment.getSvmParamsFileName() + "Linear.mat" logging.info("Using SVM params from file " + svmParamsFileName) C, kernel, kernelParamVal, errorCost = SvmInfoExperiment.loadSvmParams(svmParamsFileName) svmEgoSimulator.trainClassifier(C, kernel, kernelParamVal, errorCost, sampleSize) weights, b = svmEgoSimulator.getWeights() numpy.set_printoptions(precision=3) #Print the weights then their sorted values by indices and then value sortedWeightsInds = numpy.flipud(numpy.argsort(abs(weights))) sortedWeights = numpy.flipud(weights[numpy.argsort(abs(weights))]) egoCsvReader = EgoCsvReader() questionIds = egoCsvReader.getEgoQuestionIds() questionIds.extend(egoCsvReader.getAlterQuestionIds()) print(weights) numRankedItems = 20 for i in range(0,numRankedItems): print((str(centerValues[sortedWeightsInds[i]]) + " & " + questionIds[sortedWeightsInds[i]][0] + " & " + str("%.3f" % sortedWeights[i]) + "\\\\")) print(b)
def testReadFiles(self): p = 0.5 eCsvReader = EgoCsvReader() eCsvReader.setP(p) dataDir = PathDefaults.getDataDir() + "infoDiffusion/" egoFileName = dataDir + "EgoData3.csv" alterFileName = dataDir + "AlterData10.csv" examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR = eCsvReader.readFiles(egoFileName, alterFileName) #logging.debug(examplesList.getDataField("X")) #Read in the ego and alter arrays (egoArray, _) = eCsvReader.readFile(egoFileName, eCsvReader.getEgoQuestionIds()) (alterArray, _) = eCsvReader.readFile(alterFileName, eCsvReader.getAlterQuestionIds()) #Make up the correct results numFeatures = examplesList.getDataFieldSize("X", 1) numPersonFeatures = numFeatures/2 #Note: no alters in this case numTransmissons = 6 X2 = numpy.zeros((numTransmissons, numFeatures)) y2 = numpy.zeros((numTransmissons, 1)) X2[0, 0:numPersonFeatures] = egoArray[0, :] X2[0, numPersonFeatures:numFeatures] = egoArray[1, :] y2[0, 0] = -1 X2[1, 0:numPersonFeatures] = egoArray[0, :] X2[1, numPersonFeatures:numFeatures] = egoArray[2, :] y2[1, 0] = -1 X2[2, 0:numPersonFeatures] = egoArray[1, :] X2[2, numPersonFeatures:numFeatures] = egoArray[0, :] y2[2, 0] = -1 X2[3, 0:numPersonFeatures] = egoArray[1, :] X2[3, numPersonFeatures:numFeatures] = egoArray[2, :] y2[3, 0] = -1 X2[4, 0:numPersonFeatures] = egoArray[2, :] X2[4, numPersonFeatures:numFeatures] = egoArray[0, :] y2[4, 0] = -1 X2[5, 0:numPersonFeatures] = egoArray[2, :] X2[5, numPersonFeatures:numFeatures] = egoArray[1, :] y2[5, 0] = -1 self.assertTrue((X2 == examplesList.getDataField("X")).all()) self.assertTrue((y2 == examplesList.getDataField("y")).all()) #Second test #================ #I modified EgoData3 so that person 2 is the same age as person 1, and # hence a homophile of 1. She (2) is excluded from the non-receivers, since #she is a homophile of person 1. p = 0 eCsvReader = EgoCsvReader() eCsvReader.setP(p) examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR = eCsvReader.readFiles(egoFileName, alterFileName) numTransmissons = 5 X2 = numpy.zeros((numTransmissons, numFeatures)) y2 = numpy.zeros((numTransmissons, 1)) X2[0, 0:numPersonFeatures] = egoArray[0, :] X2[0, numPersonFeatures:numFeatures] = egoArray[2, :] y2[0, 0] = -1 X2[1, 0:numPersonFeatures] = egoArray[1, :] X2[1, numPersonFeatures:numFeatures] = egoArray[0, :] y2[1, 0] = -1 X2[2, 0:numPersonFeatures] = egoArray[1, :] X2[2, numPersonFeatures:numFeatures] = egoArray[2, :] y2[2, 0] = -1 X2[3, 0:numPersonFeatures] = egoArray[2, :] X2[3, numPersonFeatures:numFeatures] = egoArray[0, :] y2[3, 0] = -1 X2[4, 0:numPersonFeatures] = egoArray[2, :] X2[4, numPersonFeatures:numFeatures] = egoArray[1, :] y2[4, 0] = -1 self.assertTrue((X2 == examplesList.getDataField("X")).all()) self.assertTrue((y2 == examplesList.getDataField("y")).all())
def testInit(self): eCsv = EgoCsvReader() self.assertEquals(len(eCsv.getEgoQuestionIds()), 62) self.assertEquals(len(eCsv.getAlterQuestionIds()), 62)