def testReadFiles(self): p = 0.5 eCsvReader = EgoCsvReader() eCsvReader.setP(p) dataDir = PathDefaults.getDataDir() + "infoDiffusion/" egoFileName = dataDir + "EgoData3.csv" alterFileName = dataDir + "AlterData10.csv" examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR = eCsvReader.readFiles(egoFileName, alterFileName) #logging.debug(examplesList.getDataField("X")) #Read in the ego and alter arrays (egoArray, _) = eCsvReader.readFile(egoFileName, eCsvReader.getEgoQuestionIds()) (alterArray, _) = eCsvReader.readFile(alterFileName, eCsvReader.getAlterQuestionIds()) #Make up the correct results numFeatures = examplesList.getDataFieldSize("X", 1) numPersonFeatures = numFeatures/2 #Note: no alters in this case numTransmissons = 6 X2 = numpy.zeros((numTransmissons, numFeatures)) y2 = numpy.zeros((numTransmissons, 1)) X2[0, 0:numPersonFeatures] = egoArray[0, :] X2[0, numPersonFeatures:numFeatures] = egoArray[1, :] y2[0, 0] = -1 X2[1, 0:numPersonFeatures] = egoArray[0, :] X2[1, numPersonFeatures:numFeatures] = egoArray[2, :] y2[1, 0] = -1 X2[2, 0:numPersonFeatures] = egoArray[1, :] X2[2, numPersonFeatures:numFeatures] = egoArray[0, :] y2[2, 0] = -1 X2[3, 0:numPersonFeatures] = egoArray[1, :] X2[3, numPersonFeatures:numFeatures] = egoArray[2, :] y2[3, 0] = -1 X2[4, 0:numPersonFeatures] = egoArray[2, :] X2[4, numPersonFeatures:numFeatures] = egoArray[0, :] y2[4, 0] = -1 X2[5, 0:numPersonFeatures] = egoArray[2, :] X2[5, numPersonFeatures:numFeatures] = egoArray[1, :] y2[5, 0] = -1 self.assertTrue((X2 == examplesList.getDataField("X")).all()) self.assertTrue((y2 == examplesList.getDataField("y")).all()) #Second test #================ #I modified EgoData3 so that person 2 is the same age as person 1, and # hence a homophile of 1. She (2) is excluded from the non-receivers, since #she is a homophile of person 1. p = 0 eCsvReader = EgoCsvReader() eCsvReader.setP(p) examplesList, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR = eCsvReader.readFiles(egoFileName, alterFileName) numTransmissons = 5 X2 = numpy.zeros((numTransmissons, numFeatures)) y2 = numpy.zeros((numTransmissons, 1)) X2[0, 0:numPersonFeatures] = egoArray[0, :] X2[0, numPersonFeatures:numFeatures] = egoArray[2, :] y2[0, 0] = -1 X2[1, 0:numPersonFeatures] = egoArray[1, :] X2[1, numPersonFeatures:numFeatures] = egoArray[0, :] y2[1, 0] = -1 X2[2, 0:numPersonFeatures] = egoArray[1, :] X2[2, numPersonFeatures:numFeatures] = egoArray[2, :] y2[2, 0] = -1 X2[3, 0:numPersonFeatures] = egoArray[2, :] X2[3, numPersonFeatures:numFeatures] = egoArray[0, :] y2[3, 0] = -1 X2[4, 0:numPersonFeatures] = egoArray[2, :] X2[4, numPersonFeatures:numFeatures] = egoArray[1, :] y2[4, 0] = -1 self.assertTrue((X2 == examplesList.getDataField("X")).all()) self.assertTrue((y2 == examplesList.getDataField("y")).all())
def testGenerateNonReceivers(self): numEgos = 3 numFeatures = 5 p = 1 eCsvReader = EgoCsvReader() eCsvReader.setP(p) numContactsIndices = [0, 1] homophileIndexPairs = [(2,3)] receiverCounts = numpy.zeros(numEgos) #First test a very simple example with 1 homophile pair egoArray = numpy.zeros((numEgos, numFeatures)) egoArray[0, :] = [0, 1, 1, 5, 4] egoArray[1, :] = [0, 1, 1, 5, 8] egoArray[2, :] = [0, 0, 1, 3, 6] (contactsArray, egoIndices, alterIndices) = eCsvReader.generateNonReceivers(egoArray, numContactsIndices, homophileIndexPairs, receiverCounts) numContacts = 4 contactsArray2 = numpy.zeros((numContacts, numFeatures)) contactsArray2[0, :] = [0, 1, 1, 5, 8] contactsArray2[1, :] = [0, 0, 1, 3, 6] contactsArray2[2, :] = [0, 1, 1, 5, 4] contactsArray2[3, :] = [0, 0, 1, 3, 6] egoIndices2 = numpy.zeros(numContacts) egoIndices2[0] = 0 egoIndices2[1] = 0 egoIndices2[2] = 1 egoIndices2[3] = 1 self.assertTrue((contactsArray == contactsArray2).all()) self.assertTrue((egoIndices == egoIndices2).all()) #Test the case when there are some receivers already receiverCounts = numpy.array([1,1,1]) (contactsArray, egoIndices, alterIndices) = eCsvReader.generateNonReceivers(egoArray, numContactsIndices, homophileIndexPairs, receiverCounts) numContacts = 2 contactsArray2 = numpy.zeros((numContacts, numFeatures)) contactsArray2[0, :] = [0, 1, 1, 5, 8] contactsArray2[1, :] = [0, 1, 1, 5, 4] egoIndices2 = numpy.zeros(numContacts) egoIndices2[0] = 0 egoIndices2[1] = 1 self.assertTrue((contactsArray == contactsArray2).all()) self.assertTrue((egoIndices == egoIndices2).all()) #A more complex example numEgos = 6 egoArray = numpy.zeros((numEgos, numFeatures)) egoArray[0, :] = [1, 1, 1, 5, 4] egoArray[1, :] = [0, 0, 1, 5, 8] egoArray[2, :] = [0, 0, 1, 3, 6] egoArray[3, :] = [0, 0, 1, 5, 1] egoArray[4, :] = [0, 0, 1, 5, 2] egoArray[5, :] = [0, 0, 1, 5, 3] receiverCounts = numpy.zeros(numEgos) (contactsArray, egoIndices, alterIndices) = eCsvReader.generateNonReceivers(egoArray, numContactsIndices, homophileIndexPairs, receiverCounts) numContacts = 4 contactsArray2 = numpy.zeros((numContacts, numFeatures)) contactsArray2[0, :] = [0, 0, 1, 5, 8] contactsArray2[1, :] = [0, 0, 1, 5, 1] contactsArray2[2, :] = [0, 0, 1, 5, 2] contactsArray2[3, :] = [0, 0, 1, 5, 3] egoIndices2 = numpy.zeros(numContacts) egoIndices2[0] = 0 egoIndices2[1] = 0 egoIndices2[2] = 0 egoIndices2[3] = 0 self.assertTrue((contactsArray == contactsArray2).all()) self.assertTrue((egoIndices == egoIndices2).all()) #Test picking non-homophiles egoArray[0, :] = [2, 1, 2, 5, 4] (contactsArray, egoIndices, alterIndices) = eCsvReader.generateNonReceivers(egoArray, numContactsIndices, homophileIndexPairs, receiverCounts) numContacts = 5 contactsArray2 = numpy.zeros((numContacts, numFeatures)) contactsArray2[0, :] = [0, 0, 1, 5, 8] contactsArray2[1, :] = [0, 0, 1, 3, 6] contactsArray2[2, :] = [0, 0, 1, 5, 1] contactsArray2[3, :] = [0, 0, 1, 5, 2] contactsArray2[4, :] = [0, 0, 1, 5, 3] egoIndices2 = numpy.zeros(numContacts) egoIndices2[0] = 0 egoIndices2[1] = 0 egoIndices2[2] = 0 egoIndices2[3] = 0 egoIndices2[4] = 0 self.assertTrue((contactsArray == contactsArray2).all()) self.assertTrue((egoIndices == egoIndices2).all()) #Choose different p p = 0.5 eCsvReader.setP(p) egoArray[0, :] = [1, 1, 1, 5, 4] egoArray[1, :] = [0, 0, 1, 5, 8] egoArray[2, :] = [0, 0, 1, 3, 6] egoArray[3, :] = [0, 0, 1, 5, 8] egoArray[4, :] = [0, 0, 1, 5, 8] egoArray[5, :] = [0, 0, 1, 5, 8] (contactsArray, egoIndices, alterIndices) = eCsvReader.generateNonReceivers(egoArray, numContactsIndices, homophileIndexPairs, receiverCounts) numContacts = 3 contactsArray2 = numpy.zeros((numContacts, numFeatures)) contactsArray2[0, :] = [0, 0, 1, 5, 8] contactsArray2[1, :] = [0, 0, 1, 5, 8] contactsArray2[2, :] = [0, 0, 1, 3, 6] egoIndices2 = numpy.zeros(numContacts) egoIndices2[0] = 0 egoIndices2[1] = 0 egoIndices2[2] = 0 self.assertTrue((contactsArray == contactsArray2).all()) self.assertTrue((egoIndices == egoIndices2).all()) #Test 2 different homophile fields p = 1 eCsvReader.setP(p) numEgos = 6 numFeatures = 7 homophileIndexPairs = [(2,3), (4,5)] egoArray = numpy.zeros((numEgos, numFeatures)) egoArray[0, :] = [0, 0, 1, 5, 1, 2, 1] egoArray[1, :] = [0, 0, 1, 5, 1, 3, 2] egoArray[2, :] = [0, 0, 1, 4, 1, 2, 3] egoArray[3, :] = [1, 0, 1, 5, 1, 2, 4] egoArray[4, :] = [0, 0, 1, 2, 1, 1, 5] egoArray[5, :] = [0, 0, 1, 5, 1, 2, 6] (contactsArray, egoIndices, alterIndices) = eCsvReader.generateNonReceivers(egoArray, numContactsIndices, homophileIndexPairs, receiverCounts) numContacts = 2 contactsArray2 = numpy.zeros((numContacts, numFeatures)) contactsArray2[0, :] = [0, 0, 1, 5, 1, 2, 1] contactsArray2[1, :] = [0, 0, 1, 5, 1, 2, 6] egoIndices2 = numpy.zeros(numContacts) egoIndices2[0] = 3 egoIndices2[1] = 3 self.assertTrue((contactsArray == contactsArray2).all()) self.assertTrue((egoIndices == egoIndices2).all())