#Find all of the decays in information transmission egoTestFileName = dataDir + "EgoInfoTest.csv" alterTestFileName = dataDir + "AltersInfoTest.csv" decayGraph = eCsvReader.findInfoDecayGraph(egoTestFileName, alterTestFileName, egoIndicesR, alterIndices, egoIndicesNR, alterIndicesNR, egoFileName, alterFileName, missing) logging.info("Size of decay graph: " + str(decayGraph.getNumVertices())) logging.info("Number of edges: " + str(decayGraph.getNumEdges())) #Now write the decays to a simplegraph file decayFileName = dataDir + "EgoAlterDecays.dat" decayGraph.save(decayFileName) logging.info("Wrote decays to file " + decayFileName) #Now write out transmissions sampleSize = 1000 indices = Util.sampleWithoutReplacement(sampleSize, examplesList.getNumExamples()) examplesListSample = examplesList.getSubExamplesList(indices) outputFileName1 = dataDir + "EgoAlterTransmissions" outputFileName2 = dataDir + "EgoAlterTransmissions1000" examplesList.writeToMatFile(outputFileName1) examplesListSample.writeToMatFile(outputFileName2) #Let's also write out the csv file for analysis X = examplesList.getDataField(examplesList.getDefaultExamplesName()) y = examplesList.getDataField(examplesList.getLabelsName()) X = numpy.c_[X, y]
def generateNonReceivers(self, egoArray, numContactsIndices, homophileIndexPairs, receiverCounts): """ Generate a series of non receivers from egoArray based on homophility information. egoArray is the array of all egos Inputs ------ numContactsIndices - a list of indices of the number of various contacts (friends, family etc.) homophileIndexPairs - a list of pairs. The first is the index of the homophility and the second is the index of the variable. receiverCounts - the number of receivers for each ego Outputs ------- contactsArray - the array of non-receivers egoIndices - the corresponding 1D array of ego indices """ (numEgos, numEgoFeatures) = (egoArray.shape[0], egoArray.shape[1]) egoIndices = numpy.zeros(0, numpy.int32) #Store the index of each ego to each contact alterIndices = numpy.zeros(0, numpy.int32) #Store the index of each alter to each contact contactsArray = numpy.zeros((0, numEgoFeatures)) logging.info("Generating non-receivers for " + str(numEgos) + " egos") #Assume number of contacts above 9 is just 10 (final category) for i in range(0, numEgos): Util.printIteration(i, self.printIterationStep, numEgos) totalContacts = 0 for j in range(0, len(numContactsIndices)): totalContacts = totalContacts + int(egoArray[i, numContactsIndices[j]]*2) totalContacts = max(0, totalContacts - receiverCounts[i]) #Get a sample of indices for similar people (remove the current person) homophileIndices = numpy.array([], numpy.int) for j in range(0, len(homophileIndexPairs)): if egoArray[i, homophileIndexPairs[j][0]] == 1: if j==0: homophileIndices = numpy.setdiff1d(numpy.array(list(range(0, numEgos))), numpy.array([i])) subset = numpy.nonzero(egoArray[:, homophileIndexPairs[j][1]] == egoArray[i, homophileIndexPairs[j][1]])[0] homophileIndices = numpy.intersect1d(homophileIndices, subset) nonHomophileIndices = numpy.setdiff1d(numpy.array(list(range(0, numEgos))), numpy.array([i])) nonHomophileIndices = numpy.setdiff1d(nonHomophileIndices, homophileIndices) numHomophileContacts = min(int(round(self.p * totalContacts)), homophileIndices.shape[0]) numNonHomophilesContacts = min(totalContacts-numHomophileContacts, nonHomophileIndices.shape[0]) tempContacts = numpy.zeros((numHomophileContacts+numNonHomophilesContacts, numEgoFeatures)) #Add homophiles perm = Util.sampleWithoutReplacement(numHomophileContacts, homophileIndices.shape[0]) tempContacts[0:numHomophileContacts, :] = egoArray[homophileIndices[perm], :] alterIndices = numpy.r_[alterIndices, homophileIndices[perm]] #Add non homophiles perm = Util.sampleWithoutReplacement(numNonHomophilesContacts, nonHomophileIndices.shape[0]) tempContacts[numHomophileContacts:numHomophileContacts+numNonHomophilesContacts, :] = egoArray[nonHomophileIndices[perm], :] alterIndices = numpy.r_[alterIndices, nonHomophileIndices[perm]] tempEgoIndices = numpy.ones(numHomophileContacts+numNonHomophilesContacts) * i contactsArray = numpy.r_[contactsArray, tempContacts] egoIndices = numpy.r_[egoIndices, tempEgoIndices] logging.info("Done - generated " + str(egoIndices.shape[0]) + " non-receivers") return contactsArray, egoIndices, alterIndices