def __init__(self, projectRoot, ngramN, smallerNgramsToo):
     
     self.projectRoot = projectRoot
     self.nCalls = self._determineTotalNumberOfCalls()
     
     self.callAreaExtractor = SinkSnippetExtractor()
     self.embedder = Embedder(projectRoot)
     self.embedder.configureNgramCalculator(ngramN, smallerNgramsToo)
     self.nameDictMapToMatrix = NameDictMapToMatrix()
示例#2
0
    def _termDocumentMatrixFromContext(self, context, symbol):

        x = self._termDictsFromContext(context, symbol)
        if x == None: return None
        (vecs, allNgrams) = x

        self.nameDictMapToMatrix = NameDictMapToMatrix()
        self.nameDictMapToMatrix.convertFromDicts(vecs, allNgrams)
        termDocMatrix = self.nameDictMapToMatrix.termDocumentMatrix
        return termDocMatrix
示例#3
0
def main(projectRoot, tfidf=True):

    nameDictMapFilename = projectRoot + 'func2SubtreesMap.pickl'
    allSymbolsFilename = projectRoot + 'allSubtreesDict.pickl'
    termDocMatrixFilename = projectRoot + 'termDocMatrix.pickl'

    # if os.path.exists(termDocMatrixFilename):
    #    print 'Term by Document Matrix already exists, skipping.'
    #    return

    converter = NameDictMapToMatrix()
    converter.convertFromFiles(nameDictMapFilename, allSymbolsFilename)

    if tfidf:
        converter.termDocumentMatrix.tfidf()
    converter.save(projectRoot)
示例#4
0
    def createMatrixForFunctionNames(self, functionNames):
        self._loadFunc2SubtreesMap()

        self.nameToDictMap = NameToDictMap()
        self.allSymbolsDict = OccurrenceCounter()
        nameDictMapToMatrix = NameDictMapToMatrix()

        functions = [(doc, self.func2SubtreesMap.d[doc])
                     for doc in functionNames]

        for (doc, func) in functions:

            for (ngram, nOccurrences) in func.iteritems():
                for unused in xrange(nOccurrences):
                    self.nameToDictMap.add(ngram, doc)
                    self.allSymbolsDict.add(ngram)

        nameDictMapToMatrix.convertFromDicts(self.nameToDictMap,
                                             self.allSymbolsDict)
        newTermDocMatrix = nameDictMapToMatrix.termDocumentMatrix
        return newTermDocMatrix