示例#1
0
    def __init__(self, examplesFileName):
        """
        Create the class by reading examples from a Matlab file. Instantiate the SVM
        and create a preprocesor to standarise examples to have zero mean and unit variance. 
        """
        self.examplesList = ExamplesList.readFromFile(examplesFileName)
        self.examplesList.setDefaultExamplesName("X")
        self.examplesList.setLabelsName("y")

        (freqs, items) = Util.histogram(self.examplesList.getSampledDataField("y").ravel())
        logging.info("Distribution of labels: " + str((freqs, items)))
        logging.info("The base error rate is " + str(float(min(freqs))/self.examplesList.getNumExamples()))
        
        self.classifier = LibSVM()
        self.errorMethod = Evaluator.balancedError

        self.preprocessor = Standardiser()
        X = self.preprocessor.standardiseArray(self.examplesList.getDataField(self.examplesList.getDefaultExamplesName()))
        self.examplesList.overwriteDataField(self.examplesList.getDefaultExamplesName(), X)
示例#2
0
 def graphFromMatFile(matFileName):
     """
     Generate a sparse graph from a Matlab file of ego and alters and their transmissions. This is a mostly 
     disconnected graph made up of pairs of connected vertices, i.e each vertex has degree 1.  
     """
     examplesList = ExamplesList.readFromMatFile(matFileName)
     numExamples = examplesList.getNumExamples()
     numFeatures = examplesList.getDataFieldSize("X", 1)
     numVertexFeatures = numFeatures/2+1
     vList = VertexList(numExamples*2, int(numVertexFeatures))
     sGraph = SparseGraph(vList)
     
     for i in range(0, examplesList.getNumExamples()): 
         v1Index = i*2 
         v2Index = i*2+1
         example = examplesList.getSubDataField("X", numpy.array([i])).ravel()
         vertex1 = numpy.r_[example[0:numFeatures/2], numpy.array([1])]
         vertex2 = numpy.r_[example[numFeatures/2:numFeatures], numpy.array([0])]
         
         sGraph.setVertex(v1Index, vertex1)
         sGraph.setVertex(v2Index, vertex2)
         sGraph.addEdge(v1Index, v2Index)
     
     return sGraph 
示例#3
0
    def fullTransGraph(self):
        """
        This function will return a new graph which contains a directed edge if
        a transmission will occur between two vertices. 
        """
        if self.iteration != 0:
            raise ValueError("Must run fullTransGraph before advanceGraph")

        #First, find all the edges in the graph and create an ExampleList
        numEdges = self.edges.shape[0]
        X = numpy.zeros((numEdges*2, self.numPersonFeatures*2))
        ind = 0 

        for i in range(numEdges):
            vertex1 = self.graph.getVertex(self.edges[i,0])
            vertex2 = self.graph.getVertex(self.edges[i,1])

            X[ind, :] = numpy.r_[vertex1[0:self.numPersonFeatures], vertex2[0:self.numPersonFeatures]]
            X[ind+numEdges, :] = numpy.r_[vertex2[0:self.numPersonFeatures], vertex1[0:self.numPersonFeatures]]
            ind = ind + 1

        name = "X"
        examplesList = ExamplesList(X.shape[0])
        examplesList.addDataField(name, X)
        examplesList.setDefaultExamplesName(name)

        if self.preprocessor != None:
            X = self.preprocessor.process(examplesList.getDataField(examplesList.getDefaultExamplesName()))
            examplesList.overwriteDataField(examplesList.getDefaultExamplesName(), X)

        y = self.egoPairClassifier.classify(examplesList.getSampledDataField(name))
        fullTransmissionGraph = SparseGraph(self.graph.getVertexList(), False)

        transIndices = numpy.nonzero(y==1)[0]

        #Now, write out the transmission graph 
        for i in range(len(transIndices)):
            if transIndices[i] < numEdges:
                fullTransmissionGraph.addEdge(self.edges[transIndices[i],0], self.edges[transIndices[i],1])
            else:
                fullTransmissionGraph.addEdge(self.edges[transIndices[i]-numEdges,1], self.edges[transIndices[i]-numEdges,0])

        return fullTransmissionGraph
示例#4
0
    def advanceGraph(self):
        #First, find all the edges in the graph and create an ExampleList      
        blockSize = 5000
        
        X = numpy.zeros((blockSize, self.numPersonFeatures*2))
        possibleTransmissionEdges = []
        possibleTransmissionEdgeIndices = []
        
        for i in range(self.edges.shape[0]):
            vertex1 = self.graph.getVertex(self.edges[i,0])
            vertex2 = self.graph.getVertex(self.edges[i,1])

            if vertex1[self.infoIndex] == 1 and vertex2[self.infoIndex] == 0:
                X[len(possibleTransmissionEdges), :] = numpy.r_[vertex1[0:self.numPersonFeatures], vertex2[0:self.numPersonFeatures]]
                possibleTransmissionEdges.append((self.edges[i,0], self.edges[i,1]))
                possibleTransmissionEdgeIndices.append(i)
            if vertex2[self.infoIndex] == 1 and vertex1[self.infoIndex] == 0:
                X[len(possibleTransmissionEdges), :] = numpy.r_[vertex2[0:self.numPersonFeatures], vertex1[0:self.numPersonFeatures]]
                possibleTransmissionEdges.append((self.edges[i,1], self.edges[i,0]))
                possibleTransmissionEdgeIndices.append(i)

            #Increase X if it is small 
            if (len(possibleTransmissionEdges) == X.shape[0]):
                X = numpy.r_[X, numpy.zeros((blockSize, self.numPersonFeatures*2))]

        #Now, remove from edges the ones that can possible have a transmission
        self.edges = numpy.delete(self.edges, possibleTransmissionEdgeIndices, 0)
        X = X[0:len(possibleTransmissionEdges), :]

        name = "X"
        examplesList = ExamplesList(X.shape[0])
        examplesList.addDataField(name, X)
        examplesList.setDefaultExamplesName(name)
        
        if self.preprocessor != None: 
            X = examplesList.getDataField(examplesList.getDefaultExamplesName())
            X = self.preprocessor.standardiseArray(X) 
            examplesList.overwriteDataField(examplesList.getDefaultExamplesName(), X)
        
        y = self.egoPairClassifier.classify(X)
        
        transmissionEdges = numpy.zeros((sum(y==1), 2), numpy.int)
        j = 0 

        #Now, update the vertices to reflect transfer 
        for i in range(len(possibleTransmissionEdges)):
            if y[i] == 1:
                transmissionEdges[j, 0] = possibleTransmissionEdges[i][0]
                transmissionEdges[j, 1] = possibleTransmissionEdges[i][1]

                self.transmissionGraph.setVertex(int(transmissionEdges[j, 0]), self.graph.getVertex(transmissionEdges[j, 0]))
                self.transmissionGraph.setVertex(int(transmissionEdges[j, 1]), self.graph.getVertex(transmissionEdges[j, 1]))
                self.transmissionGraph.addEdge(int(transmissionEdges[j, 0]), int(transmissionEdges[j, 1]), 1)

                j += 1

                vertex = self.graph.getVertex(possibleTransmissionEdges[i][1])
                vertex[self.infoIndex] = 1
                self.graph.setVertex(possibleTransmissionEdges[i][1], vertex)
                    
        self.allTransmissionEdges.append(transmissionEdges)
        self.iteration = self.iteration + 1
        
        return self.graph