示例#1
0
def checkDistributions():
    matFileName = "../../data/EgoAlterTransmissions.mat"
    examplesList = ExamplesList.readFromMatFile(matFileName)

    numFeatures = examplesList.getDataFieldSize("X", 1)
    X = examplesList.getDataField("X")[:, 0:numFeatures/2]
    Z = examplesList.getDataField("X")[:, numFeatures/2:numFeatures]
    y = examplesList.getDataField("y")
    A = Z[y==-1, :]

    #Now load directly from the CSV file
    #Learn the distribution of the egos
    eCsvReader = EgoCsvReader()
    egoFileName = "../../data/EgoData.csv"
    alterFileName = "../../data/AlterData.csv"
    egoQuestionIds = eCsvReader.getEgoQuestionIds()
    alterQuestionIds = eCsvReader.getAlterQuestionIds()
    (X2, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds)
    X2[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(X2[:, eCsvReader.ageIndex])

    (mu, sigma) = Util.computeMeanVar(X)
    (mu2, sigma2) = Util.computeMeanVar(X2)
    (mu3, sigma3) = Util.computeMeanVar(Z)
    (mu4, sigma4) = Util.computeMeanVar(A)

    #Seems okay. Next check alters
    print(("Mean " + str(mu - mu4)))
    print(("Variance " + str(numpy.diag(sigma - sigma4))))

    """
    Analysis between the Egos in EgoData.csv and those in EgoAlterTransmissions.mat
    reveals that the distributions match closely. The main differences are
    in the means and variances in Q44A - D, but this isn't too suprising.
    """

    """
示例#2
0
examplesFileName = SvmInfoExperiment.getExamplesFileName()
sampleSize = 86755

svmEgoSimulator = SvmEgoSimulator(examplesFileName)
preprocessor = svmEgoSimulator.getPreProcessor()
centerValues = preprocessor.getCentreVector()

svmParamsFileName = SvmInfoExperiment.getSvmParamsFileName() + "Linear.mat"
logging.info("Using SVM params from file " + svmParamsFileName)

C, kernel, kernelParamVal, errorCost = SvmInfoExperiment.loadSvmParams(svmParamsFileName)
svmEgoSimulator.trainClassifier(C, kernel, kernelParamVal, errorCost, sampleSize)

weights, b  = svmEgoSimulator.getWeights()

numpy.set_printoptions(precision=3)

#Print the weights then their sorted values by indices and then value
sortedWeightsInds = numpy.flipud(numpy.argsort(abs(weights)))
sortedWeights = numpy.flipud(weights[numpy.argsort(abs(weights))])

egoCsvReader = EgoCsvReader()
questionIds = egoCsvReader.getEgoQuestionIds()
questionIds.extend(egoCsvReader.getAlterQuestionIds())

print(weights)
numRankedItems = 20

for i in range(0,numRankedItems):
    print((str(centerValues[sortedWeightsInds[i]]) + " & " + questionIds[sortedWeightsInds[i]][0] + " & " + str("%.3f" % sortedWeights[i]) + "\\\\"))
print(b)
示例#3
0
    def testReadFiles(self):
        p = 0.5
        eCsvReader = EgoCsvReader()
        eCsvReader.setP(p)

        dataDir = PathDefaults.getDataDir() + "infoDiffusion/"
        egoFileName = dataDir + "EgoData3.csv"
        alterFileName = dataDir + "AlterData10.csv"
        examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR  = eCsvReader.readFiles(egoFileName, alterFileName)
        #logging.debug(examplesList.getDataField("X"))
        
        #Read in the ego and alter arrays 
        (egoArray, _) = eCsvReader.readFile(egoFileName, eCsvReader.getEgoQuestionIds())
        (alterArray, _) = eCsvReader.readFile(alterFileName, eCsvReader.getAlterQuestionIds())
        
        #Make up the correct results 
        numFeatures = examplesList.getDataFieldSize("X", 1)
        numPersonFeatures = numFeatures/2 

        #Note: no alters in this case 
        numTransmissons = 6
        X2 = numpy.zeros((numTransmissons, numFeatures))
        y2 = numpy.zeros((numTransmissons, 1))
        
        X2[0, 0:numPersonFeatures] = egoArray[0, :]
        X2[0, numPersonFeatures:numFeatures] = egoArray[1, :]
        y2[0, 0] = -1
        
        X2[1, 0:numPersonFeatures] = egoArray[0, :]
        X2[1, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[1, 0] = -1
        
        X2[2, 0:numPersonFeatures] = egoArray[1, :]
        X2[2, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[2, 0] = -1
        
        X2[3, 0:numPersonFeatures] = egoArray[1, :]
        X2[3, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[3, 0] = -1
        
        X2[4, 0:numPersonFeatures] = egoArray[2, :]
        X2[4, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[4, 0] = -1
        
        X2[5, 0:numPersonFeatures] = egoArray[2, :]
        X2[5, numPersonFeatures:numFeatures] = egoArray[1, :]
        y2[5, 0] = -1

        self.assertTrue((X2 == examplesList.getDataField("X")).all())
        self.assertTrue((y2 == examplesList.getDataField("y")).all())



        #Second test
        #================
        #I modified EgoData3 so that person 2 is the same age as person 1, and
        # hence a homophile of 1. She (2) is excluded from the non-receivers, since
        #she is a homophile of person 1.

        p = 0
        eCsvReader = EgoCsvReader()
        eCsvReader.setP(p)

        examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR  = eCsvReader.readFiles(egoFileName, alterFileName)

        numTransmissons = 5
        X2 = numpy.zeros((numTransmissons, numFeatures))
        y2 = numpy.zeros((numTransmissons, 1))

        X2[0, 0:numPersonFeatures] = egoArray[0, :]
        X2[0, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[0, 0] = -1

        X2[1, 0:numPersonFeatures] = egoArray[1, :]
        X2[1, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[1, 0] = -1

        X2[2, 0:numPersonFeatures] = egoArray[1, :]
        X2[2, numPersonFeatures:numFeatures] = egoArray[2, :]
        y2[2, 0] = -1

        X2[3, 0:numPersonFeatures] = egoArray[2, :]
        X2[3, numPersonFeatures:numFeatures] = egoArray[0, :]
        y2[3, 0] = -1

        X2[4, 0:numPersonFeatures] = egoArray[2, :]
        X2[4, numPersonFeatures:numFeatures] = egoArray[1, :]
        y2[4, 0] = -1

        self.assertTrue((X2 == examplesList.getDataField("X")).all())
        self.assertTrue((y2 == examplesList.getDataField("y")).all())
示例#4
0
    def testInit(self):
        eCsv = EgoCsvReader()

        self.assertEquals(len(eCsv.getEgoQuestionIds()), 62)
        self.assertEquals(len(eCsv.getAlterQuestionIds()), 62)