Python Coder.convertToOneHot примеры использования

Язык программирования: Python

Пространство имен/Пакет: coder

Класс/Тип: Coder

Метод/Функция: convertToOneHot

Примеров на hotexamples.com: 3

Python Coder.convertToOneHot - 3 примера найдено. Это лучшие примеры Python кода для coder.Coder.convertToOneHot, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Coder(18)

encode(13)

decode(6)

decrypt(4)

crypt(4)

applyPadding(3)

convertToOneHot(3)

run(3)

convertFromOneHot(2)

__init__(2)

removePadding(2)

create_from_codes(1)

code_to_msg(1)

code(1)

getFormat(1)

list_tokens(1)

msg_to_code(1)

Пример #1

Показать файл

Файл: buildModel.py Проект: Nieobliczalny/CodeFixer

class ModelBuilder():
    '''
    def datagen(self, batchSize, xMaxLen, yMaxLen):
        while self.ObjInd < self.ObjMax:
            end = batchSize
            if end + self.ObjInd > self.ObjMax:
                end = self.ObjMax - self.ObjInd
            x = np.zeros((end, xMaxLen, self.totalDictionaryLength))
            y = np.zeros((end, yMaxLen, self.totalDictionaryLength))
            for j in range(self.ObjInd, self.ObjInd + end):
                valueX = self.X[j]
                noZerosToPad = xMaxLen - len(valueX)
                if noZerosToPad > 0:
                    valueX = self.coder.applyPadding(valueX, noZerosToPad)
                valueY = self.Y[j]
                noZerosToPad = yMaxLen - len(valueY)
                if noZerosToPad > 0:
                    valueY = self.coder.applyPadding(valueY, noZerosToPad)
                self.coder.convertToOneHot(valueX, x[j - self.ObjInd])
                self.coder.convertToOneHot(valueY, y[j - self.ObjInd])
            self.ObjInd += end
            #some code here to load and manipulate data into x and y. Mostly numpy functions
            yield x,y
    '''
    def build(self, checker, startK, startBatch):
        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length(
        )  # + globals.firstAvailableToken

        # Load training data from file
        print("Loading training data...")
        data = []
        with open(config.cfTrainFilenameFormat.format(checker), "r") as f:
            data = f.readlines()
        random.shuffle(data)
        dataLen = len(data)
        print("Done, fetched {0} records".format(dataLen))
        if dataLen < 1:
            print("No data found")
            return

        # Json load
        print("Converting to objects...")
        self.X = []
        self.Y = []
        self.ObjInd = 0
        self.ObjMax = dataLen
        xMaxLen = 0
        yMaxLen = 0
        for record in data:
            obj = json.loads(record[:-1])
            self.X.append(obj['x'])
            self.Y.append(obj['y'])
            if len(obj['x']) > xMaxLen:
                xMaxLen = len(obj['x'])
            if len(obj['y']) > yMaxLen:
                yMaxLen = len(obj['y'])

        # Padding
        print("Counted input and output lengths (X = {0}, Y = {1})...".format(
            xMaxLen, yMaxLen))

        # Preparing model
        print("Preparing model...")
        batchSaveIndex = 0
        batchSaveCounter = 0
        batchSaveThreshold = 10000

        if startK == 0 and startBatch == 0:
            model = Sequential()
            model.add(
                LSTM(config.cfTrainHiddenSize,
                     input_shape=(xMaxLen, self.totalDictionaryLength)))
            model.add(RepeatVector(yMaxLen))
            for _ in range(config.cfTrainNumLayers):
                model.add(LSTM(config.cfTrainHiddenSize,
                               return_sequences=True))
            model.add(TimeDistributed(Dense(self.totalDictionaryLength)))
            model.add(Activation('softmax'))
            model.compile(loss='categorical_crossentropy',
                          optimizer='rmsprop',
                          metrics=['accuracy'])
        else:
            modelFormat = 'checkpoint_epoch_{0}.{1}.h5'.format(
                startK - 1, checker)
            if startBatch > 0:
                batchSaveIndex = int(startBatch / batchSaveThreshold)
                modelFormat = 'checkpoint_epoch_b{2}.{0}.{1}.h5'.format(
                    startK, checker, batchSaveIndex - 1)
            model = load_model(modelFormat)
        '''
        print("Converting data...")
        X_s = np.zeros((dataLen, xMaxLen, self.totalDictionaryLength))
        Y_s = np.zeros((dataLen, yMaxLen, self.totalDictionaryLength))
        for j in range(dataLen):
            valueX = X[j]
            noZerosToPad = xMaxLen - len(valueX)
            if noZerosToPad > 0:
                valueX = self.coder.applyPadding(valueX, noZerosToPad)
            valueY = Y[j]
            noZerosToPad = yMaxLen - len(valueY)
            if noZerosToPad > 0:
                valueY = self.coder.applyPadding(valueY, noZerosToPad)
            self.coder.convertToOneHot(valueX, X_s[j])
            self.coder.convertToOneHot(valueY, Y_s[j])
        '''
        # Training model
        '''
        print("Training...")
        for k in range(startK, config.cfTrainNoEpochs):
            #model.fit(X_s, Y_s, epochs=1)#, validation_split=0.2)
            model.fit(self.datagen(config.cfTrainBatchSize, xMaxLen, yMaxLen), epochs=1, steps_per_epoch=103)#, validation_split=0.2)
            model.save('checkpoint_epoch_{0}.{1}.h5'.format(k, checker))
        '''
        #"""
        print("Training model...")
        for k in range(startK, config.cfTrainNoEpochs):
            i = 0
            model.reset_metrics()
            if k == startK:
                i = startBatch
            while i < dataLen:
                end = i + config.cfTrainBatchSize
                if end > dataLen:
                    end = dataLen
                #'''
                X_s = np.zeros((end - i, xMaxLen, self.totalDictionaryLength))
                Y_s = np.zeros((end - i, yMaxLen, self.totalDictionaryLength))
                for j in range(i, end):
                    valueX = self.X[j]
                    noZerosToPad = xMaxLen - len(
                        valueX)  #int((xMaxLen - len(valueX)) / 2)
                    if noZerosToPad > 0:
                        valueX = self.coder.applyPadding(valueX, noZerosToPad)
                    valueY = self.Y[j]
                    noZerosToPad = yMaxLen - len(
                        valueY)  #int((yMaxLen - len(valueY)) / 2)
                    if noZerosToPad > 0:
                        valueY = self.coder.applyPadding(valueY, noZerosToPad)
                    zerosX = np.zeros((xMaxLen, self.totalDictionaryLength))
                    zerosY = np.zeros((yMaxLen, self.totalDictionaryLength))
                    X_s[j - i] = self.coder.convertToOneHot(valueX, zerosX)
                    Y_s[j - i] = self.coder.convertToOneHot(valueY, zerosY)
                result = model.train_on_batch(X_s, Y_s, reset_metrics=False)
                #'''
                #result = model.train_on_batch(X_s[i:end], Y_s[i:end])
                #del X_s
                #del Y_s
                print(
                    "[{2}] Done batch {0}-{1} (loss: {3:.3f}, accuracy: {4:.3f})"
                    .format(i, end, k, result[0], result[1]))
                i += config.cfTrainBatchSize
                batchSaveCounter += config.cfTrainBatchSize
                if batchSaveCounter >= batchSaveThreshold:
                    batchSaveCounter = 0
                    model.save('checkpoint_epoch_b{2}.{0}.{1}.h5'.format(
                        k, checker, batchSaveIndex))
                    batchSaveIndex += 1
            model.save('checkpoint_epoch_{0}.{1}.h5'.format(k, checker))
            batchSaveIndex = 0
            batchSaveCounter = 0
        #"""
        print("All done, exiting...")

Пример #2

Показать файл

class Predictor():
    def __init__(self):
        self.vcs = GitProvider(config.getRepoDir())
        self.ccdb = CCDatabase(config.getCcDbFile())
        self.codeChecker = CodeChecker(config.getRepoDir())
        self.checkers = Checkers()
        self.loadCommitList()

    def loadCommitList(self):
        self.commits = self.vcs.getAllVersions(config.getBranch())
        self.currentCommitIndex = 0

    def convertFilePathToRepoRelativePath(self, path):
        return os.path.relpath(path, config.getRepoDir())

    def getDiffResolvedIds(self):
        resolved = self.codeChecker.diffResolved(config.getCcRunName(),
                                                 config.getTmpDir(), self.ccdb)
        ids = []
        for bug in resolved:
            ids.append(bug['reportId'])
        return ids

    def predict(self, id, checker):
        # Load all bugs
        print("Loading bug data...")
        ids = []
        if id == -1:
            bugs = self.ccdb.getAllBugsForChecker(checker)
            ids = [x[0] for x in bugs]
        else:
            ids.append(id)

        # Loading model
        print("Loading model...")
        model = load_model(config.cfModelFilenameFormat.format(checker))
        model.summary()
        vLabels = ['NOT OK', 'OK', 'Skipped']

        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length()

        # Predicting
        print("Starting predictions...")
        for i in ids:
            allData = self.ccdb.getBugData(i)
            if allData.getChecker(
            ) not in globals.availableCheckers or allData.getChecker(
            ) != checker:
                print("Bug #{0} - checker not supported".format(i))
            else:
                # Load extra tokens from checker message
                checkerInfo = self.checkers.extractTokensForChecker(
                    allData.getChecker(), allData.getMessage())
                # Retrieve code fragment with bug
                fileRelativePath = self.convertFilePathToRepoRelativePath(
                    allData.getFile())
                fullCodeWithBug = self.vcs.getFileContents(
                    fileRelativePath, self.commits[self.currentCommitIndex])
                extractor = CodeExtractor(allData)
                extractor.loadCodeFromText(fullCodeWithBug)
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = self.coder.encode(
                    bugCodeFragment, checkerData=checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1]
                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print(
                        "Bug #{0} - Code too big for model, ignored".format(i))
                    continue
                elif id == -1:
                    print("Bug #{0} - Good to go".format(i))
                    continue
                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = self.coder.applyPadding(
                        encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength))
                X[0] = self.coder.convertToOneHot(
                    encodedBugData,
                    np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = self.coder.convertFromOneHot(model.predict(X)[0])
                print(Y)
                # Decode
                Y = self.coder.removePadding(Y)
                fixCodeFragment = self.coder.decode(Y, initialUnkList)

                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(allData.getFile())
                    # Run CodeChecker and analyze code
                    self.codeChecker.check(True)
                    resolvedIds = self.getDiffResolvedIds()
                    # Check if ID is resolved in tmp folder
                    isFixed = i in resolvedIds
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                #Print
                print("Bug #{0} - summary".format(i))
                print("== Code fragment with bug ==")
                print(bugCodeFragment)
                print("== Suggested fix ==")
                print(fixCodeFragment)
                print("Verification: {0}".format(vLabels[vStatus]))
                a = ' '
                while a != 'y' and a != 'n':
                    a = input("Apply fix? (y/n): ")
                if a == 'y':
                    if not config.cfVerifyPrediction:
                        # Apply fix in source code file
                        extractor.applyFix(fixCodeFragment)
                        extractor.saveToFile(allData.getFile())
                elif config.cfVerifyPrediction:
                    # Revert file contents
                    self.vcs.checkout(self.commits[self.currentCommitIndex])
                print('Done')
        print("All done, exiting...")

Пример #3

Показать файл

Файл: hook.py Проект: Nieobliczalny/CodeFixer

    def main(self):
        # Do analysis
        shutil.rmtree(config.getTmpDir())
        self.codeChecker.check(True)

        # Diff new
        newBugs = self.getDiffNew()

        if len(newBugs) < 1:
            print('No new bugs introduced, commit is accepted!')
            return
        
        print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs)))

        # Load models
        models = {}
        for checker in globals.availableCheckers:
            models[checker] = load_model(config.cfModelFilenameFormat.format(checker))

        # Load all content from files having new
        files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs])
        fileContents = {}
        for f in files:
            fn = config.getRepoDir() + f
            with open(fn, 'r') as fh:
                fileContents[f] = ''.join(fh.readlines())

        # For each file sort by bug line desc
        suggestions = []
        validSuggestions = 0
        for f in files:
            bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f]
            bugs.sort(key=lambda x: x.getLine(), reverse=True)
            print("=== File: {0} ===".format(f))
            # For each bug get a suggestion and test it
            for b in bugs:
                print("L{0}, Type: {1}".format(b.getLine(), b.getChecker()))
                # Prepare useful data
                dictionary = Dictionary(b.getChecker())
                coder = Coder(dictionary)
                totalDictionaryLength = dictionary.length()
                # Prepare and extract bug fragment
                checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage())
                extractor = CodeExtractor(b)
                extractor.loadCodeFromText(fileContents[f])
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1]

                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print("Ignored: Code too big for model")
                    continue

                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength))
                X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0])
                Y = coder.removePadding(Y)
                # Decode
                fixCodeFragment = coder.decode(Y, initialUnkList)[:-1]
                
                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(b.getFile())
                    # Run CodeChecker and analyze code
                    shutil.rmtree(config.getTmpDir())
                    compilationLog = self.codeChecker.check(True)
                    newBugsAfterFix = self.getDiffNew()
                    # Check if ID is resolved in tmp folder
                    isFixed = 'Build failed' not in compilationLog
                    for nb in newBugsAfterFix:
                        if self.isBugDataEqual(b, nb):
                            isFixed = False
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                    # Revert file
                    extractor.loadCodeFromText(fileContents[f])
                    extractor.saveToFile(b.getFile())
                if vStatus == 0:
                    print("Verification: Negative, cannot be applied")
                elif vStatus == 1:
                    print("Verification: Positive, can be applied")
                    validSuggestions += 1
                elif vStatus == 2:
                    print("Verification: Skipped")
                    validSuggestions += 1
                sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus)
                suggestions.append(sugg)
        print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs)))

        if validSuggestions > 0:
            print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?")
            apply = False
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'a':
                    apply = True
                    choice = False
                    print("Applying fixes...")
                elif c == 'i':
                    choice = False
                    print("Fixes ignored...")
                elif c == 'd':
                    self.displaySuggestions(suggestions)
                    print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
            if apply:
                self.applyValidFixes(suggestions, files)
                print("Fixes applied!")
        if validSuggestions != len(newBugs):
            print("Unable to fix all bugs, continue with commit (c) or abort (q)?")
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'c':
                    choice = False
                    print("Continuing...")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
        else:
            print("Bugs corrected, commit is good to go!")