def cluster(self): # We cluster for each argument independently! retval = ClusterResult() curOffset = 0 argNum = 0 for symbolsForArg in self.contentProvider.getSourceAPISymbols(): D = self._calculateDistanceMatrix(symbolsForArg) curOffset = len(retval.clusterIdToDatapoint.keys()) if len(symbolsForArg) == 0: argNum += 1 continue if len(symbolsForArg) == 1: retval.register(curOffset, symbolsForArg[0], argNum) argNum += 1 continue Z = linkage(D, method=self.linkageMethod) clustering = fcluster(Z, self.maxDistInCluster, criterion = 'distance') retval.registerSet(symbolsForArg, clustering, curOffset, argNum) argNum += 1 return retval
def cluster(self, models): retval = ClusterResult() for model in models: for argNum in range(model.getNumberOfArguments()): invocs = model.members curOffset = len(retval.clusterIdToDatapoint.keys()) embedder = ConditionEmbedder(self.contentProvider) embedder.embed(invocs, argNum) # TODO: we need to be able to pass a distance parameter to joern-cluster clusterLines = [x.rstrip() for x in launch('joern-cluster')] clustering = [] datapoints = [] for line in clusterLines: (nodeId, clusterId) = line.split('\t') clustering.append(int(clusterId)) datapoints.append(nodeId) retval.registerSet(datapoints, clustering, curOffset, argNum) os.system('rm -rf embedding') return retval
def cluster(self, models): retval = ClusterResult() for model in models: for argNum in range(model.getNumberOfArguments()): invocs = model.members curOffset = len(retval.clusterIdToDatapoint.keys()) embedder = ConditionEmbedder(self.contentProvider) embedder.embed(invocs, argNum) # TODO: we need to be able to pass a distance parameter to joern-cluster clusterLines = [x.rstrip() for x in launch("joern-cluster")] clustering = [] datapoints = [] for line in clusterLines: (nodeId, clusterId) = line.split("\t") clustering.append(int(clusterId)) datapoints.append(nodeId) retval.registerSet(datapoints, clustering, curOffset, argNum) os.system("rm -rf embedding") return retval
def cluster(self): # We cluster for each argument independently! retval = ClusterResult() curOffset = 0 argNum = 0 for symbolsForArg in self.contentProvider.getSourceAPISymbols(): D = self._calculateDistanceMatrix(symbolsForArg) curOffset = len(retval.clusterIdToDatapoint.keys()) if len(symbolsForArg) == 0: argNum += 1 continue if len(symbolsForArg) == 1: retval.register(curOffset, symbolsForArg[0], argNum) argNum += 1 continue Z = linkage(D, method=self.linkageMethod) clustering = fcluster(Z, self.maxDistInCluster, criterion='distance') retval.registerSet(symbolsForArg, clustering, curOffset, argNum) argNum += 1 return retval
def cluster(self, sourceClusters): self.defStmts = self.contentProvider.getAllDefStmtsPerArg() self.sClusters = sourceClusters converter = InvocationsToDataMatrix() dataMatrix = converter.convert(self.defStmts, self.sClusters) if dataMatrix.T.shape == (1,1): return ClusterResult() D = pdist(dataMatrix.T, METRIC) Z = linkage(D, method= LINKAGE_METHOD) clustering = fcluster(Z, self.maxDistInCluster, criterion = 'distance') result = ClusterResult() result.setNumberOfArguments(len(self.defStmts[0]) if len(self.defStmts) > 0 else 0) result.registerSet(range(len(self.defStmts)), clustering) result.dataMatrix = dataMatrix result.callSiteIds = self.contentProvider.getInvocationCallSiteIds() return result