Пример #1
0
def embedAllFunctions(projectRoot, filterName = 'DefaultFilter', ngramN = None, smallerNgramsToo = None):
    
    functionEmbedder = Embedder(projectRoot)
    if ngramN != None:
        functionEmbedder.configureNgramCalculator(ngramN, smallerNgramsToo)
    
    if filterName != 'DefaultFilter':
        """ if filterName == 'OnlyConditionsFilter':
            functionEmbedder.setFilter(OnlyConditionsFilter())
        elif filterName == 'DefaultPlusConditions':
            print 'DefaultPlusConditions'
            functionEmbedder.setFilter(DefaultPlusConditionsFilter())    
        elif filterName == 'IdentifiersInConditions':
            functionEmbedder.setFilter(IdentifiersInConditionsFilter())
        elif filterName == 'WaterOnly':
            functionEmbedder.setFilter(WaterOnlyFilter())
        elif filterName == 'ConditionalExpressions':
            functionEmbedder.setFilter(ConditionalExpressionsFilter())
        """
        if filterName == 'Symbols':
            functionEmbedder.setFilter(SymbolFilter())
        elif filterName == 'APISymbols':
            functionEmbedder.setFilter(APIUsageFilter())
        
    functionEmbedder.embedAllFunctions()
    
    name = '%s_%d.pickl' % (filterName, ngramN)
    return functionEmbedder.save(name)
Пример #2
0
 def __init__(self, projectRoot, ngramN, smallerNgramsToo):
     
     self.projectRoot = projectRoot
     self.nCalls = self._determineTotalNumberOfCalls()
     
     self.callAreaExtractor = SinkSnippetExtractor()
     self.embedder = Embedder(projectRoot)
     self.embedder.configureNgramCalculator(ngramN, smallerNgramsToo)
     self.nameDictMapToMatrix = NameDictMapToMatrix()
Пример #3
0
def embedAllFunctions(projectRoot,
                      filterName='DefaultFilter',
                      ngramN=None,
                      smallerNgramsToo=None):

    functionEmbedder = Embedder(projectRoot)
    if ngramN != None:
        functionEmbedder.configureNgramCalculator(ngramN, smallerNgramsToo)

    if filterName != 'DefaultFilter':
        """ if filterName == 'OnlyConditionsFilter':
            functionEmbedder.setFilter(OnlyConditionsFilter())
        elif filterName == 'DefaultPlusConditions':
            print 'DefaultPlusConditions'
            functionEmbedder.setFilter(DefaultPlusConditionsFilter())    
        elif filterName == 'IdentifiersInConditions':
            functionEmbedder.setFilter(IdentifiersInConditionsFilter())
        elif filterName == 'WaterOnly':
            functionEmbedder.setFilter(WaterOnlyFilter())
        elif filterName == 'ConditionalExpressions':
            functionEmbedder.setFilter(ConditionalExpressionsFilter())
        """
        if filterName == 'Symbols':
            functionEmbedder.setFilter(SymbolFilter())
        elif filterName == 'APISymbols':
            functionEmbedder.setFilter(APIUsageFilter())

    functionEmbedder.embedAllFunctions()

    name = '%s_%d.pickl' % (filterName, ngramN)
    return functionEmbedder.save(name)
Пример #4
0
class SinkSnippetEmbedder:

    def __init__(self, projectRoot, ngramN, smallerNgramsToo):
        
        self.projectRoot = projectRoot
        self.nCalls = self._determineTotalNumberOfCalls()
        
        self.callAreaExtractor = SinkSnippetExtractor()
        self.embedder = Embedder(projectRoot)
        self.embedder.configureNgramCalculator(ngramN, smallerNgramsToo)
        self.nameDictMapToMatrix = NameDictMapToMatrix()
   
    def _isSinkCalledTooOften(self, callsToSink):
        # If more than 50 percent of calls are calls to
        # this function, this sink is just called to often
        # to be interesting
        if float(len(callsToSink))/self.nCalls > UPPER_BOUND_FOR_NUMBER_OF_CALLS_AS_FRACTION:
            print 'Sink called too often'
            return True
        return False
    
    def _isSinkNotCalledOftenEnough(self, callsToSink):
        return (len(callsToSink) < LOWER_BOUND_FOR_NUMBER_OF_CALLS)
    
    def _determineTotalNumberOfCalls(self):
        callIndex = pickle.load(file(self.projectRoot + 'callIndex.pickl'))
        return numpy.sum([len(v) for v in callIndex.d.itervalues()])
        
    def embedSinkUsers(self, sink):
        
        callsToSink = sink[1]
        
        if self._isSinkCalledTooOften(callsToSink):
            return (None, None)
        if self._isSinkNotCalledOftenEnough(callsToSink):
            print 'Sink not called often enough'
            return (None, None)
        
        getSinkAreaSubtree = self.callAreaExtractor.getSinkAreaSubtree
        filterAndAddAST = self.embedder.filterAndAddAST
        
        for label in callsToSink:
            areaSubtree = getSinkAreaSubtree(self.projectRoot, label)    
            filterAndAddAST(label, areaSubtree)
        
        (vecs, allNgrams) = self.embedder.getMaps()
        self.nameDictMapToMatrix.convertFromDicts(vecs, allNgrams)
        return (sink[0], self.nameDictMapToMatrix.termDocumentMatrix)
    
    def save(self, name, sinkName):
        import os
        
        embeddingsDir = self.projectRoot + 'embeddings'
        thisEmbeddingDir = embeddingsDir + '/'+ name
        sinkEmbeddingDir = thisEmbeddingDir + '/' + 'sinks'
        thisSinkEmbeddingDir = sinkEmbeddingDir + '/' + sinkName
        
        if not os.path.exists(embeddingsDir):
            os.mkdir(embeddingsDir)
        
        if not os.path.exists(thisEmbeddingDir):
            os.mkdir(thisEmbeddingDir)
        
        if not os.path.exists(sinkEmbeddingDir):
            os.mkdir(sinkEmbeddingDir)
        
        if not os.path.exists(thisSinkEmbeddingDir):
            os.mkdir(thisSinkEmbeddingDir)
            
        pickle.dump(self.nameDictMapToMatrix.nameDictMap, file( thisSinkEmbeddingDir + '/func2SubtreesMap.pickl', 'w'))
        pickle.dump(self.nameDictMapToMatrix.allSymbolsDict, file( thisSinkEmbeddingDir + '/allSubtreesDict.pickl', 'w'))