Пример #1
0
 def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'):
     self.nmfCluster = NMFCluster()
     self.nmfCluster.algorithm = algorithm
     self.initialNBases = initialNBases
     self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases)
     print 'W: %d, %d' % (self.nmfCluster.W.shape[0],
                          self.nmfCluster.W.shape[1])
Пример #2
0
 def SVDNMFFindGlitchesForSink(self, sink):
     termDocMatrix = self._getTermDocMatrixForSink(sink)
     
     from mlutils.clustering.NMFCluster import NMFCluster
     nmfCluster = NMFCluster()
     nmfCluster.algorithm = self.args.cluster_algo
     nmfCluster.cluster(termDocMatrix)
             
     self.outputClusterStatistics(nmfCluster)
     
     anomalyResults = self.anomalyDetection(nmfCluster)
     
     print 'Global Anomaly Ranking:'
     for r in anomalyResults: print r
     
     self.outputCode(anomalyResults, nmfCluster)
Пример #3
0
def main(projectRoot, sinkOfInterest, outputFilename, configuration):

    nmfCluster = NMFCluster(configuration)

    termDocMatrix = sinkSnippetEmbedder(projectRoot, sinkOfInterest, configuration)
    if termDocMatrix == None:
        print "termDocMatrix empty"
        return

    nmfCluster.cluster(termDocMatrix)
    # basis_vector_printing_thresh = 0.1
    # nmfCluster.printPrototypes(basis_vector_printing_thresh)
    anomalyResults = anomalyDetection(projectRoot, nmfCluster)

    if outputFilename != None:
        pickle.dump(anomalyResults, file(outputFilename, "w"))
Пример #4
0
    def SVDNMFFindGlitchesForSink(self, sink):
        termDocMatrix = self._getTermDocMatrixForSink(sink)

        from mlutils.clustering.NMFCluster import NMFCluster
        nmfCluster = NMFCluster()
        nmfCluster.algorithm = self.args.cluster_algo
        nmfCluster.cluster(termDocMatrix)

        self.outputClusterStatistics(nmfCluster)

        anomalyResults = self.anomalyDetection(nmfCluster)

        print 'Global Anomaly Ranking:'
        for r in anomalyResults:
            print r

        self.outputCode(anomalyResults, nmfCluster)
Пример #5
0
def main(projectRoot, sinkOfInterest, outputFilename, configuration):

    nmfCluster = NMFCluster(configuration)

    termDocMatrix = sinkSnippetEmbedder(projectRoot, sinkOfInterest,
                                        configuration)
    if termDocMatrix == None:
        print 'termDocMatrix empty'
        return

    nmfCluster.cluster(termDocMatrix)
    # basis_vector_printing_thresh = 0.1
    # nmfCluster.printPrototypes(basis_vector_printing_thresh)
    anomalyResults = anomalyDetection(projectRoot, nmfCluster)

    if outputFilename != None:
        pickle.dump(anomalyResults, file(outputFilename, 'w'))
Пример #6
0
class DimReductionRanker(Ranker):
    def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'):
        self.nmfCluster = NMFCluster()
        self.nmfCluster.algorithm = algorithm
        self.initialNBases = initialNBases
        self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases)
        print 'W: %d, %d' % (self.nmfCluster.W.shape[0],
                             self.nmfCluster.W.shape[1])

    def determineNMFErrorVecs(self):
        E = self.nmfCluster.getError()
        scores = (numpy.square(E).sum(axis=0)).tolist()[0]
        return scores

    def rank(self, initialNBases=200, algorithm='SVD'):
        self.loadTermDocMatrix()
        self.clusterTermDocMatrix(initialNBases, algorithm)
        scores = self.determineNMFErrorVecs()
        ranking = self.rankingFromScores(scores)
        return ranking
Пример #7
0
class DimReductionRanker(Ranker):
    
    def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'):
        self.nmfCluster = NMFCluster()
        self.nmfCluster.algorithm = algorithm
        self.initialNBases = initialNBases
        self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases)
        print 'W: %d, %d' % (self.nmfCluster.W.shape[0], self.nmfCluster.W.shape[1])
    
    def determineNMFErrorVecs(self):
        E = self.nmfCluster.getError()
        scores = (numpy.square(E).sum(axis=0)).tolist()[0]
        return scores

    def rank(self, initialNBases = 200, algorithm='SVD'):
        self.loadTermDocMatrix()
        self.clusterTermDocMatrix(initialNBases, algorithm)
        scores = self.determineNMFErrorVecs()
        ranking = self.rankingFromScores(scores)
        return ranking
        
Пример #8
0
 def clusterTermDocMatrix(self, initialNBases=200, algorithm='SVD'):
     self.nmfCluster = NMFCluster()
     self.nmfCluster.algorithm = algorithm
     self.initialNBases = initialNBases
     self.nmfCluster.cluster(self.termDocMatrix, self.initialNBases)
     print 'W: %d, %d' % (self.nmfCluster.W.shape[0], self.nmfCluster.W.shape[1])