def __init__(self, kernel, tau1, tau2): Parameter.checkFloat(tau1, 0.0, float('inf')) Parameter.checkFloat(tau2, 0.0, float('inf')) Parameter.checkClass(kernel, AbstractKernel) self.tau1 = tau1 self.tau2 = tau2 self.kernel = kernel
def generateGraph(self, alpha, p, dim): Parameter.checkFloat(alpha, 0.0, float('inf')) Parameter.checkFloat(p, 0.0, 1.0) Parameter.checkInt(dim, 0, float('inf')) numVertices = self.graph.getNumVertices() self.X = numpy.random.rand(numVertices, dim) D = KernelUtils.computeDistanceMatrix(numpy.dot(self.X, self.X.T)) P = numpy.exp(-alpha * D) diagIndices = numpy.array(list(range(0, numVertices))) P[(diagIndices, diagIndices)] = numpy.zeros(numVertices) B = numpy.random.rand(numVertices, numVertices) <= P #Note that B is symmetric - could just go through e.g. upper triangle for i in range(numpy.nonzero(B)[0].shape[0]): v1 = numpy.nonzero(B)[0][i] v2 = numpy.nonzero(B)[1][i] self.graph.addEdge(v1, v2) erdosRenyiGenerator = ErdosRenyiGenerator(p) self.graph = erdosRenyiGenerator.generate(self.graph, False) return self.graph
def setWeight(self, weight): """ :param weight: the weight on the positive examples between 0 and 1 (the negative weight is 1-weight) :type weight: :class:`float` """ Parameter.checkFloat(weight, 0.0, 1.0) self.weight = weight
def setRandomInfected(self, numInitialInfected, proportionHetero, t=0.0): """ Pick a number of people randomly to be infected at time t. Of that set proportionHetero are selected to be heterosexual and min((1-proportionHetero), totalBi) are bisexual. """ Parameter.checkInt(numInitialInfected, 0, self.size) Parameter.checkFloat(proportionHetero, 0.0, 1.0) heteroInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.hetero] biInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.bi] numHetero = int(numInitialInfected*proportionHetero) numBi = numInitialInfected-numHetero heteroInfectInds = numpy.random.permutation(heteroInds.shape[0])[0:numHetero] biInfectInds = numpy.random.permutation(biInds.shape[0])[0:numBi] for i in heteroInfectInds: j = heteroInds[i] self.vlist.setInfected(j, t) for i in biInfectInds: j = biInds[i] self.vlist.setInfected(j, t)
def setP(self, p): ''' :param p: the probability of an edge :type p: :class:`float` ''' Parameter.checkFloat(p, 0.0, 1.0) self.p = p
def setErrorCost(self, errorCost): """ The penalty on errors on positive labels. The penalty for negative labels is 1. """ Parameter.checkFloat(errorCost, 0.0, 1.0) self.errorCost = errorCost
def shuffleSplit(repetitions, numExamples, trainProportion=None): """ Random permutation cross-validation iterator. The training set is sampled without replacement and of size (repetitions-1)/repetitions of the examples, and the test set represents the remaining examples. Each repetition is sampled independently. :param repetitions: The number of repetitions to perform. :type repetitions: :class:`int` :param numExamples: The number of examples. :type numExamples: :class:`int` :param trainProp: The size of the training set relative to numExamples, between 0 and 1 or None to use (repetitions-1)/repetitions :type trainProp: :class:`int` """ Parameter.checkInt(numExamples, 2, float('inf')) Parameter.checkInt(repetitions, 1, float('inf')) if trainProportion != None: Parameter.checkFloat(trainProportion, 0.0, 1.0) if trainProportion == None: trainSize = (repetitions-1)*numExamples/repetitions else: trainSize = trainProportion*numExamples idx = [] for i in range(repetitions): inds = numpy.random.permutation(numExamples) trainInds = inds[0:trainSize] testInds = inds[trainSize:] idx.append((trainInds, testInds)) return idx
def setSampleSize(self, sampleSize): """ :param sampleSize: The number of examples to randomly sample for each tree. :type sampleSize: :class:`int` """ Parameter.checkFloat(sampleSize, 0.0, 1.0) self.sampleSize = sampleSize
def binaryBootstrapError(testY, predTestY, trainY, predTrainY, weight): """ Evaluate an error in conjunction with a bootstrap method by computing w*testErr + (1-w)*trainErr """ Parameter.checkFloat(weight, 0.0, 1.0) return weight*Evaluator.binaryError(testY, predTestY) + (1-weight)*Evaluator.binaryError(trainY, predTrainY)
def __init__(self, kernelX, tau1, tau2): Parameter.checkFloat(tau1, 0.0, 1.0) Parameter.checkFloat(tau2, 0.0, 1.0) Parameter.checkClass(kernelX, AbstractKernel) self.kernelX = kernelX self.tau1 = tau1 self.tau2 = tau2
def __init__(self, tau): """ Intialise the object with regularisation parameter tau between 0 (no regularisation) and 1 (full regularisation). """ Parameter.checkFloat(tau, 0.0, 1.0) self.tau = tau
def setP(self, p): """ Set the rewiring probability. :param p: the probability of rewiring an edge. :type p: :class:`float` """ Parameter.checkFloat(p, 0.0, 1.0) self.p = p
def setC(self, C): try: from sklearn.svm import SVC except: raise Parameter.checkFloat(C, 0.0, float('inf')) self.C = C self.__updateParams()
def setInfected(self, vertexInd, time): Parameter.checkIndex(vertexInd, 0, self.getNumVertices()) Parameter.checkFloat(time, 0.0, float('inf')) if self.V[vertexInd, HIVVertices.stateIndex] == HIVVertices.infected: raise ValueError("Person is already infected") self.V[vertexInd, HIVVertices.stateIndex] = HIVVertices.infected self.V[vertexInd, HIVVertices.infectionTimeIndex] = time
def setB(self, b): """ Set the b parameter. :param b: kernel bias parameter. :type b: :class:`float` """ Parameter.checkFloat(b, 0.0, float('inf')) self.b = b
def createTruncNormParam(self, sigma, mode): """ Truncated norm parameter between 0 and 1 """ Parameter.checkFloat(sigma, 0.0, 1.0) Parameter.checkFloat(mode, 0.0, float('inf')) a = -mode/sigma b = (1-mode)/sigma priorDist = lambda: stats.truncnorm.rvs(a, b, loc=mode, scale=sigma) priorDensity = lambda x: stats.truncnorm.pdf(x, a, b, loc=mode, scale=sigma) return priorDist, priorDensity
def createDiscTruncNormParam(self, sigma, mode, upper, lower=0): """ Discrete truncated norm parameter """ Parameter.checkFloat(sigma, 0.0, float('inf')) Parameter.checkFloat(mode, 0.0, float('inf')) a = (lower-mode)/sigma b = (upper-mode)/sigma priorDist = lambda: round(stats.truncnorm.rvs(a, b, loc=mode, scale=sigma)) priorDensity = lambda x: stats.truncnorm.pdf(x, a, b, loc=mode, scale=sigma) return priorDist, priorDensity
def setDetected(self, vertexInd, time, detectionType): Parameter.checkIndex(vertexInd, 0, self.getNumVertices()) Parameter.checkFloat(time, 0.0, float('inf')) if detectionType not in [HIVVertices.randomDetect, HIVVertices.contactTrace]: raise ValueError("Invalid detection type : " + str(detectionType)) if self.V[vertexInd, HIVVertices.stateIndex] != HIVVertices.infected: raise ValueError("Person must be infected to be detected") self.V[vertexInd, HIVVertices.stateIndex] = HIVVertices.removed self.V[vertexInd, HIVVertices.detectionTimeIndex] = time self.V[vertexInd, HIVVertices.detectionTypeIndex] = detectionType
def setSigma(self, sigma): """ Set the sigma parameter. :param sigma: kernel width parameter. :type sigma: :class:`float` """ Parameter.checkFloat(sigma, 0.0, float('inf')) if sigma == 0.0: raise ValueError("Sigma cannot be zero") self.sigma = sigma
def createGammaParam(self, sigma, mu): Parameter.checkFloat(sigma, 0.0, float('inf')) Parameter.checkFloat(mu, 0.0, float('inf')) if mu == 0.0: raise ValueError("Gamma distribution cannot have mean zero.") theta = sigma**2/mu k = mu/theta k = min(k, 1000) priorDist = lambda: stats.gamma.rvs(k, scale=theta) priorDensity = lambda x: stats.gamma.pdf(x, k, scale=theta) return priorDist, priorDensity
def createGammaParam(self, sigma, mu): Parameter.checkFloat(sigma, 0.0, float('inf')) Parameter.checkFloat(mu, 0.0, float('inf')) if mu == 0.0: raise ValueError("Gamma distribution cannot have mean zero.") theta = sigma**2/mu k = mu/theta if k > self.maxK: k == self.maxK logging.warn("k for gamma distribution > " + str(self.maxK) + ", clipping") priorDist = lambda: stats.gamma.rvs(k, scale=theta) priorDensity = lambda x: stats.gamma.pdf(x, k, scale=theta) return priorDist, priorDensity
def __init__(self, algorithm="PATH", alpha=0.5, featureInds=None, useWeightM=True): """ Intialise the matching object with a given algorithm name, alpha which is a trade of between matching adjacency matrices and vertex labels, and featureInds which is an option array of indices to use for label matching. """ Parameter.checkFloat(alpha, 0.0, 1.0) Parameter.checkClass(algorithm, str) self.algorithm = algorithm self.alpha = alpha self.maxInt = 10**9 self.featureInds = featureInds self.useWeightM = useWeightM #Gamma is the same as dummy_nodes_c_coef for costing added vertex labels self.gamma = 0.0 #Same as dummy_nodes_fill self.rho = 0.5
def maxBudgetedInfluence(self, P, u, L): """ A greedy method for the budgeted maximum influence method. We pick the index with maximum residual gain in activation divided by the cost, such that the total cost is still within budget. This algorithm has an unbounded approximation ratio. """ Parameter.checkFloat(L, 0.0, float('inf')) Q = (P.T/u).T numVertices = P.shape[0] bestActivations = numpy.zeros(numVertices) bestTotalActivation = 0 selectedIndices = [] unselectedIndices = set(range(0, numVertices)) currentBudget = 0 while True: bestIndex = -1 logging.debug("Budget remaining: " + str(L - currentBudget)) for j in unselectedIndices: activations = numpy.max(numpy.r_['0,2', Q[j, :], bestActivations], 0) currentActivation = numpy.sum(activations) if currentActivation > bestTotalActivation and currentBudget + u[j] <= L: bestIndex = j bestTotalActivation = numpy.sum(currentActivation) if bestIndex == -1: break bestActivations = numpy.max(numpy.r_['0,2', Q[bestIndex, :], bestActivations], 0) selectedIndices.append(bestIndex) unselectedIndices.remove(bestIndex) currentBudget = currentBudget + u[bestIndex] return selectedIndices
def localAuc(testY, predY, u): """ Compute the local AUC measure for a given ROC curve. The parameter u is the proportion of best instances to use u = P(s(X) > t). """ Parameter.checkFloat(u, 0.0, 1.0) fpr, tpr = Evaluator.roc(testY, predY) minExampleIndex = numpy.floor((predY.shape[0]-1)*u) minExampleScore = numpy.flipud(numpy.sort(predY))[minExampleIndex] intersectInd = numpy.searchsorted(numpy.sort(numpy.unique(predY)), minExampleScore) intersectInd = numpy.unique(predY).shape[0] - intersectInd alpha = fpr[intersectInd] beta = tpr[intersectInd] localAuc = numpy.sum(0.5*numpy.diff(fpr[0:intersectInd])*(tpr[0:max(intersectInd-1, 0)] + tpr[1:intersectInd])) localAuc += beta*(1-alpha) return localAuc
def __init__(self, kernelX, kernelY, tau): """ Intialise the object with kernels (i.e an object instantiating a subclass of AbstractKernel) on the X and Y spaces and regularisation parameter tau between 0 (no regularisation) and 1 (full regularisation). :param kernelX: The kernel object on the X examples. :type kernelX: :class:`apgl.kernel.AbstractKernel` :param kernelY: The kernel object on the Y examples. :type kernelY: :class:`apgl.kernel.AbstractKernel` :param tau: The regularisation parameter between 0 and 1. :type tau: :class:`float` """ Parameter.checkFloat(tau, 0.0, 1.0) Parameter.checkClass(kernelX, AbstractKernel) Parameter.checkClass(kernelY, AbstractKernel) self.kernelX = kernelX self.kernelY = kernelY self.tau = tau
def effectiveDiameter(self, q, P=None): """ The effective diameter is the minimum d such that for a fraction q of reachable node pairs, the path length is at most d. This is more rubust than the standard diameter method. One can optionally pass in a matrix P whose ijth entry is the shortest path from i to j. :param q: The fraction of node pairs to consider. :type q: :class:`float` :param P: An optional nxn matrix whose ijth entry is the shortest path from i to j. :type P: :class:`ndarray` :returns: The effective diameter of this graph. """ Parameter.checkFloat(q, 0.0, 1.0) if P!=None and (type(P) != numpy.ndarray or P.shape != (self.getNumVertices(), self.getNumVertices())): raise ValueError("P must be array of same size as weight matrix of graph") if self.getNumEdges() == 0: return 0 #Paths from a vertex to itself are ignored if P == None: P = self.floydWarshall(False) else: P = P.copy() P[numpy.diag_indices(P.shape[0])] = float('inf') paths = numpy.sort(P[P!=float('inf')]) if paths.shape[0] != 0: ind = numpy.floor((paths.shape[0]-1)*q) return int(paths[ind]) else: return 0.0
def testCheckFloat(self): min = 0.0 max = 5.0 i = 2.0 Parameter.checkFloat(i, min, max) Parameter.checkFloat(min, min, max) Parameter.checkFloat(max, min, max) Parameter.checkFloat(i, i, i) self.assertRaises(ValueError, Parameter.checkFloat, i, max, min) self.assertRaises(ValueError, Parameter.checkFloat, i, int(min), max) self.assertRaises(ValueError, Parameter.checkFloat, i, min, int(max)) self.assertRaises(ValueError, Parameter.checkFloat, 2, min, max) self.assertRaises(ValueError, Parameter.checkFloat, -1, min, max) self.assertRaises(ValueError, Parameter.checkFloat, 6, min, max) #Check half ranges such as [0, inf] Parameter.checkFloat(i, min, float("inf")) Parameter.checkFloat(i, float("-inf"), max) #Check use of numpy float64 min = numpy.float64(0.0) max = numpy.float64(5.0) i = numpy.float64(2.0) Parameter.checkFloat(i, min, max) Parameter.checkFloat(min, min, max) Parameter.checkFloat(max, min, max) Parameter.checkFloat(i, i, i)
def setSampleSize(self, sampleSize): Parameter.checkFloat(sampleSize, 0.0, 1.0) self.numTrees = sampleSize
def setScore(self, score): Parameter.checkFloat(score, 0.0, float("inf")) self.score = score
def setLambda(self, lmbda): Parameter.checkFloat(lmbda, 0.0, float('inf')) self.lmbda = lmbda
def setTau1(self, tau1): Parameter.checkFloat(tau1, 0.0, float('inf')) self.tau1 = tau1
def setTermination(self, tol): Parameter.checkFloat(tol, 0.0, 1.0) self.tol = tol self.__updateParams()