示例#1
0
def checkDistributions():
    matFileName = "../../data/EgoAlterTransmissions.mat"
    examplesList = ExamplesList.readFromMatFile(matFileName)

    numFeatures = examplesList.getDataFieldSize("X", 1)
    X = examplesList.getDataField("X")[:, 0:numFeatures/2]
    Z = examplesList.getDataField("X")[:, numFeatures/2:numFeatures]
    y = examplesList.getDataField("y")
    A = Z[y==-1, :]

    #Now load directly from the CSV file
    #Learn the distribution of the egos
    eCsvReader = EgoCsvReader()
    egoFileName = "../../data/EgoData.csv"
    alterFileName = "../../data/AlterData.csv"
    egoQuestionIds = eCsvReader.getEgoQuestionIds()
    alterQuestionIds = eCsvReader.getAlterQuestionIds()
    (X2, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds)
    X2[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(X2[:, eCsvReader.ageIndex])

    (mu, sigma) = Util.computeMeanVar(X)
    (mu2, sigma2) = Util.computeMeanVar(X2)
    (mu3, sigma3) = Util.computeMeanVar(Z)
    (mu4, sigma4) = Util.computeMeanVar(A)

    #Seems okay. Next check alters
    print(("Mean " + str(mu - mu4)))
    print(("Variance " + str(numpy.diag(sigma - sigma4))))

    """
    Analysis between the Egos in EgoData.csv and those in EgoAlterTransmissions.mat
    reveals that the distributions match closely. The main differences are
    in the means and variances in Q44A - D, but this isn't too suprising.
    """

    """
示例#2
0
    #checkDistributions()

    """
    We will read ego and alters data, and check they have the same values.
    """

    egoFileName = "../../data/EgoData.csv"
    alterFileName = "../../data/AlterData.csv"

    eCsvReader = EgoCsvReader()
    egoQuestionIds = eCsvReader.getEgoQuestionIds()
    alterQuestionIds = eCsvReader.getAlterQuestionIds()

    missing = 0 
    (egoX, titles) = eCsvReader.readFile(egoFileName, egoQuestionIds, missing)
    egoX[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(egoX[:, eCsvReader.ageIndex])

    (alterX, titles) = eCsvReader.readFile(alterFileName, alterQuestionIds, missing)
    alterX[:, eCsvReader.ageIndex] = eCsvReader.ageToCategories(alterX[:, eCsvReader.ageIndex])

    numFeatures = egoX.shape[1]
    numEgoExamples = egoX.shape[0]
    numAlterExamples = alterX.shape[0]

    for i in range(0, numFeatures):
        (histE, uniqElementsE) = Util.histogram(egoX[:, i])
        (histA, uniqElementsA) = Util.histogram(alterX[:, i])

        print((str(i) + " " + str(egoQuestionIds[i])))
        print(("Ego   " + str(uniqElementsE)))
        print(("Alter " + str(uniqElementsA)))