def setUp(self): logging.basicConfig(stream=sys.stdout, level=logging.INFO) dataDir = PathDefaults.getDataDir() + "infoDiffusion/" numVertices = 100 numFeatures = 5 c = numpy.random.rand(numFeatures) vList = VertexList(numVertices, numFeatures) vList.setVertices(numpy.random.rand(numVertices, numFeatures)) graph = SparseGraph(vList) p = 0.1 generator = ErdosRenyiGenerator(p) graph = generator.generate(graph) #Now rewrite some vertices to have different labels which depend only #on the alter. edges = graph.getAllEdges() for i in range(edges.shape[0]): edgeLabel = numpy.dot(graph.getVertex(edges[i, 1]), c) graph.addEdge(edges[i, 0], edges[i, 1], edgeLabel) #Create the predictor lmbda = 0.01 self.alterRegressor = PrimalRidgeRegression(lmbda) self.egoRegressor = PrimalRidgeRegression(lmbda) predictor = EgoEdgeLabelPredictor(self.alterRegressor, self.egoRegressor) self.egoNetworkSimulator = EgoNetworkSimulator(graph, predictor) self.graph = graph
def trainPredictor(): """ Train the model for the simulation. """ paramsFile = InfoExperiment.getSvmParamsFileName() sampleSize = InfoExperiment.getNumSimulationExamples() simulator = EgoNetworkSimulator(examplesFileName) params, paramFuncs = InfoExperiment.loadParams(paramsFile) startTime = time.time() simulator.trainClassifier(params, paramFuncs, sampleSize) endTime = time.time() logging.info("Time taken for training is " + str((endTime-startTime)) + " seconds.") return simulator
def saveParams(paramsFile): """ This method runs model selection for the SVM and saves the parameters and errors. Only need to do this once for the data. """ graphFileName = InfoExperiment.getGraphFileName() graph = SparseGraph.load(graphFileName) logging.info("Find all ego networks") trees = graph.findTrees() subgraphSize = 10000 subgraphIndices = [] for i in range(len(trees)): subgraphIndices.extend(trees[i]) if len(subgraphIndices) > subgraphSize: logging.info("Chose " + str(i) + " ego networks.") break graph = graph.subgraph(subgraphIndices) logging.info("Taking random subgraph of size " + str(graph.getNumVertices())) folds = 3 sampleSize = graph.getNumEdges() lambda1s = 2.0**numpy.arange(-8,-2) lambda2s = 2.0**numpy.arange(-8,-2) sigmas = 2.0**numpy.arange(-6,0) #lambda1s = [0.0625, 2.0, 10.0] #lambda2s = [0.0625, 5.0, 30.0] #sigmas = [0.0625, 1.0, 10.0] logging.info("lambda1s = " + str(lambda1s)) logging.info("lambda2s = " + str(lambda2s)) logging.info("sigmas = " + str(sigmas)) lmbda = 0.1 alpha = 5.0 #Note that this is not the same as that used the errorFunc kernel = LinearKernel() alterRegressor = PrimalWeightedRidgeRegression(lmbda, alpha) egoRegressor = KernelRidgeRegression(kernel, lmbda) predictor = EgoEdgeLabelPredictor(alterRegressor, egoRegressor) simulator = EgoNetworkSimulator(graph, predictor) errorFunc = Evaluator.weightedRootMeanSqError paramList = [] paramFuncs = [egoRegressor.setLambda, alterRegressor.setLambda] #First just use the linear kernel for i in lambda1s: for j in lambda2s: paramList.append([i, j]) params, paramFuncs, error = simulator.modelSelection(paramList, paramFuncs, folds, errorFunc, sampleSize) #Now try the RBF kernel kernel = GaussianKernel() egoRegressor.setKernel(kernel) paramFuncs2 = [egoRegressor.setLambda, alterRegressor.setLambda, kernel.setSigma] paramList2 = [] for i in lambda1s: for j in lambda2s: for k in sigmas: paramList2.append([i, j, k]) params2, paramFuncs2, error2 = simulator.modelSelection(paramList2, paramFuncs2, folds, errorFunc, sampleSize) if error2 < error: params = params2 paramFuncs = paramFuncs2 paramsFile = InfoExperiment.getParamsFileName() (means, vars) = simulator.evaluateClassifier(params, paramFuncs, folds, errorFunc, sampleSize) logging.info("Evaluated classifier with mean errors " + str(means)) simulator.getClassifier().saveParams(params, paramFuncs, paramsFile)
class EgoNetworkSimulatorTest(unittest.TestCase): def setUp(self): logging.basicConfig(stream=sys.stdout, level=logging.INFO) dataDir = PathDefaults.getDataDir() + "infoDiffusion/" numVertices = 100 numFeatures = 5 c = numpy.random.rand(numFeatures) vList = VertexList(numVertices, numFeatures) vList.setVertices(numpy.random.rand(numVertices, numFeatures)) graph = SparseGraph(vList) p = 0.1 generator = ErdosRenyiGenerator(p) graph = generator.generate(graph) #Now rewrite some vertices to have different labels which depend only #on the alter. edges = graph.getAllEdges() for i in range(edges.shape[0]): edgeLabel = numpy.dot(graph.getVertex(edges[i, 1]), c) graph.addEdge(edges[i, 0], edges[i, 1], edgeLabel) #Create the predictor lmbda = 0.01 self.alterRegressor = PrimalRidgeRegression(lmbda) self.egoRegressor = PrimalRidgeRegression(lmbda) predictor = EgoEdgeLabelPredictor(self.alterRegressor, self.egoRegressor) self.egoNetworkSimulator = EgoNetworkSimulator(graph, predictor) self.graph = graph def testSampleExamples(self): numEdges = 100 graph = self.egoNetworkSimulator.sampleEdges(numEdges) self.assertEquals(graph.getNumEdges(), numEdges) self.assertEquals(graph.getNumVertices(), self.graph.getNumVertices()) self.assertEquals(graph.isUndirected(), self.graph.isUndirected()) def testModelSelection(self): paramList = [[0.1, 0.1], [0.2, 0.1], [0.1, 0.2]] paramFunc = [self.egoRegressor.setLambda, self.alterRegressor.setLambda] folds = 3 errorFunc = Evaluator.rootMeanSqError sampleSize = 100 params, paramFuncs, errors = self.egoNetworkSimulator.modelSelection(paramList, paramFunc, folds, errorFunc, sampleSize) logging.debug(params) logging.debug(paramFuncs) logging.debug(errors) def testEvaluateClassifier(self): params = [0.1, 0.1] paramFunc = [self.egoRegressor.setLambda, self.alterRegressor.setLambda] folds = 3 errorFunc = Evaluator.rootMeanSqError sampleSize = 100 (means, vars) = self.egoNetworkSimulator.evaluateClassifier(params, paramFunc, folds, errorFunc, sampleSize) logging.debug(means) logging.debug(vars) def testTrainClassifier(self): params= [0.1, 0.1] paramFuncs = [self.egoRegressor.setLambda, self.alterRegressor.setLambda] sampleSize = 100 self.egoNetworkSimulator.trainClassifier(params, paramFuncs, sampleSize) def testGenerateRandomGraph(self): egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv" alterFileName = PathDefaults.getDataDir() + "infoDiffusion/AlterData.csv" numVertices = 1000 infoProb = 0.1 graphType = "SmallWorld" p = 0.1 neighbours = 10 self.egoNetworkSimulator.generateRandomGraph(egoFileName, alterFileName, numVertices, infoProb, graphType, p, neighbours) def testRunSimulation(self): egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv" alterFileName = PathDefaults.getDataDir() + "infoDiffusion/AlterData.csv" numVertices = 1000 infoProb = 0.1 graphType = "SmallWorld" p = 0.1 neighbours = 10 params= [0.1, 0.1] paramFuncs = [self.egoRegressor.setLambda, self.alterRegressor.setLambda] sampleSize = 100 maxIterations = 5 self.egoNetworkSimulator.trainClassifier(params, paramFuncs, sampleSize) self.egoNetworkSimulator.generateRandomGraph(egoFileName, alterFileName, numVertices, infoProb, graphType, p, neighbours) self.egoNetworkSimulator.runSimulation(maxIterations)