def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'): self.nmfCluster = NMFCluster() self.nmfCluster.algorithm = algorithm self.initialNBases = initialNBases self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases) print 'W: %d, %d' % (self.nmfCluster.W.shape[0], self.nmfCluster.W.shape[1])
def SVDNMFFindGlitchesForSink(self, sink): termDocMatrix = self._getTermDocMatrixForSink(sink) from mlutils.clustering.NMFCluster import NMFCluster nmfCluster = NMFCluster() nmfCluster.algorithm = self.args.cluster_algo nmfCluster.cluster(termDocMatrix) self.outputClusterStatistics(nmfCluster) anomalyResults = self.anomalyDetection(nmfCluster) print 'Global Anomaly Ranking:' for r in anomalyResults: print r self.outputCode(anomalyResults, nmfCluster)
def main(projectRoot, sinkOfInterest, outputFilename, configuration): nmfCluster = NMFCluster(configuration) termDocMatrix = sinkSnippetEmbedder(projectRoot, sinkOfInterest, configuration) if termDocMatrix == None: print "termDocMatrix empty" return nmfCluster.cluster(termDocMatrix) # basis_vector_printing_thresh = 0.1 # nmfCluster.printPrototypes(basis_vector_printing_thresh) anomalyResults = anomalyDetection(projectRoot, nmfCluster) if outputFilename != None: pickle.dump(anomalyResults, file(outputFilename, "w"))
def SVDNMFFindGlitchesForSink(self, sink): termDocMatrix = self._getTermDocMatrixForSink(sink) from mlutils.clustering.NMFCluster import NMFCluster nmfCluster = NMFCluster() nmfCluster.algorithm = self.args.cluster_algo nmfCluster.cluster(termDocMatrix) self.outputClusterStatistics(nmfCluster) anomalyResults = self.anomalyDetection(nmfCluster) print 'Global Anomaly Ranking:' for r in anomalyResults: print r self.outputCode(anomalyResults, nmfCluster)
def main(projectRoot, sinkOfInterest, outputFilename, configuration): nmfCluster = NMFCluster(configuration) termDocMatrix = sinkSnippetEmbedder(projectRoot, sinkOfInterest, configuration) if termDocMatrix == None: print 'termDocMatrix empty' return nmfCluster.cluster(termDocMatrix) # basis_vector_printing_thresh = 0.1 # nmfCluster.printPrototypes(basis_vector_printing_thresh) anomalyResults = anomalyDetection(projectRoot, nmfCluster) if outputFilename != None: pickle.dump(anomalyResults, file(outputFilename, 'w'))
class DimReductionRanker(Ranker): def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'): self.nmfCluster = NMFCluster() self.nmfCluster.algorithm = algorithm self.initialNBases = initialNBases self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases) print 'W: %d, %d' % (self.nmfCluster.W.shape[0], self.nmfCluster.W.shape[1]) def determineNMFErrorVecs(self): E = self.nmfCluster.getError() scores = (numpy.square(E).sum(axis=0)).tolist()[0] return scores def rank(self, initialNBases=200, algorithm='SVD'): self.loadTermDocMatrix() self.clusterTermDocMatrix(initialNBases, algorithm) scores = self.determineNMFErrorVecs() ranking = self.rankingFromScores(scores) return ranking
class DimReductionRanker(Ranker): def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'): self.nmfCluster = NMFCluster() self.nmfCluster.algorithm = algorithm self.initialNBases = initialNBases self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases) print 'W: %d, %d' % (self.nmfCluster.W.shape[0], self.nmfCluster.W.shape[1]) def determineNMFErrorVecs(self): E = self.nmfCluster.getError() scores = (numpy.square(E).sum(axis=0)).tolist()[0] return scores def rank(self, initialNBases = 200, algorithm='SVD'): self.loadTermDocMatrix() self.clusterTermDocMatrix(initialNBases, algorithm) scores = self.determineNMFErrorVecs() ranking = self.rankingFromScores(scores) return ranking
def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'): self.nmfCluster = NMFCluster() self.nmfCluster.algorithm = algorithm self.initialNBases = initialNBases self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases) print 'W: %d, %d' % (self.nmfCluster.W.shape[0], self.nmfCluster.W.shape[1])