Python NaturalLanguageObject.tokeniseNormals примеры использования

Язык программирования: Python

Пространство имен/Пакет: Modules.NaturalLanguage

Класс/Тип: NaturalLanguageObject

Метод/Функция: tokeniseNormals

Примеров на hotexamples.com: 5

Python NaturalLanguageObject.tokeniseNormals - 5 примеров найдено. Это лучшие примеры Python кода для Modules.NaturalLanguage.NaturalLanguageObject.tokeniseNormals, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

NaturalLanguageObject(6)

tokeniseNormals(3)

sentenceNormalised(1)

Пример #1

Показать файл

Файл: main.py Проект: guningyi/ml_app

def Main():
    _isUnitTestingSS = False
    _isUnitTestingV = False
    _recursiveInput = False
    _TrainingDataInputFile = "Datasets/Sstt.utf8.txt"
    _TestSentence = ""
    _TestSequenceGenSize = 30
    _OutputFile = None

    consoleInArgs = sys.argv[1:]
    # check input arguments
    for index, val in enumerate(consoleInArgs):
        # Runs the unit testing module on initiation
        if(val == "-utss"):
            _isUnitTestingSS = True
        # Unit testing for the vocabulary network
        elif(val == "-utv"):
            _isUnitTestingV = True
        elif(len(consoleInArgs) >= index+1):
            # specify training data location
            if(val == "-td"):
                _TrainingDataInputFile = consoleInArgs[index+1]
                ConsoleOutput.printGreen("Training data load locaiton changed to: \"" + _TrainingDataInputFile + "\"")
            # give a generation sentence input
            elif(val == "-ts"):
                _TestSentence = consoleInArgs[index+1]
                if(len(_TestSentence.split()) != _TrainRangeSS):
                    raise ValueError('Test sequence must be the same length as the vector training size. (' + str(_TrainRangeSS) + ')')
            # set the amount of words generated after input
            elif(val == "-tsc"):
                _TestSequenceGenSize = int(consoleInArgs[index+1])
                ConsoleOutput.printGreen("Test sequence generation size changed to: " + str(_TestSequenceGenSize))
            # set the output file for the generated data to be printed to
            elif(val == "-of"):
                _OutputFile = str(consoleInArgs[index+1])
                ConsoleOutput.printGreen("Output generation location changed to: (" + consoleInArgs[index+1]+ ")")
        else:
            raise ValueError('Un-recognized console argument: ' + str(val))
    # Initialise colorama cross-platform console logging
    init()

    MLNetworkSS = NNSentenceStructure()
    MLNetworkV = NNVocabulary()
    # Network trainer converts text data into normalized vectors that
    # can be passed into the networks
    networkTrainer = NetworkTrainer(_TrainRangeSS, _TrainRangeV)
    networkTrainer.loadTextFromFile(_TrainingDataInputFile)
    # Trainer parses the structure into vector normal arrays of size (_TrainRangeSS)
    # the next word of the squence is used as the target, example
    # ["Harry", "sat", "on", "his"] - ["broomstick"] <-- target
    networkTrainer.loadSentenceStructureNormals()
    networkTrainer.loadVocabularyNormals(MLNetworkV)
    # Pass the vectors into the network
    MLNetworkSS.loadVectorsIntoNetwork(networkTrainer._TrainingSequenceSS, networkTrainer._TrainingTargetsSS)
    # Passs into vocab network here ****

    # Fit data
    MLNetworkSS.FitNetwork()
    MLNetworkV.FitNetwork()
    # Fit to vocab network here ****

    # Use console argument "-utss" to activate
    #testing
    uTester = None
    if(_isUnitTestingSS):
        #if(uTester == None):
            #uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS, _TrainRangeV)
        #uTester.TestSentenceStructuring()
        print("_isUnitTestingSS is true")
    # use console argument "-utv" to activate
    if(_isUnitTestingV):
        #if(uTester == None):
            #uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS, _TrainRangeV)
        #uTester.TestVocabulary()
        print("_isUnitTestingV is true")
    if(_TestSentence != ""):
        print("_TestSentence is true")
        printToFile = False
        f = None
        # user has specified output location
        if(_OutputFile != None):
            printToFile = True
            f = open(_OutputFile,'w')
        genSize = _TestSequenceGenSize  #要生成的目标文章的大小
        initialInput = _TestSentence
        if(printToFile):
            f.write(initialInput + " ")
        else:
            print(initialInput + " ", end="")
        initialInput = initialInput.split()  # 输入的关键词分割
        # generate a sentence of genSize
        for index in range(0, genSize):
            #print(initialInput)
            nlo = NaturalLanguageObject(initialInput)
            #解决中文切词中存在的二意切词问题，为了让测试数据的维度能匹配训练数据的维度，要丢掉[('word', tag), ('word', tag),('word', tag),('word', tag),('word', tag)...]
            #头部多余的部分tuple，否则KNN分类器会报错。
            diff = len(nlo.sentenceNormalised) - _TrainRangeSS
            if(diff > 0):
                nlo.sentenceNormalised = nlo.sentenceNormalised[diff:]
            # since nlo will always be the right size, we can use that variable
            predToke = MLNetworkSS.getPrediction([nlo.sentenceNormalised])
            nextToke = nlo.tokeniseNormals([predToke])
            # now we have the next toke in the sentence, convert that to word
            word = MLNetworkV.getPredictedWord(nlo.sentenceNormalised[-1], nextToke[0])
            # decide whether to print to file or console
            if(printToFile):
                f.write(str(word) + " ")
            else:
                print(str(word) + " ", end="")
            initialInput.append(word)
            # maintain a size of 'genSize'
            del initialInput[0]
        print("\n")
    # Reset console back to original state
    deinit()

Пример #2

Показать файл

    def TestSentenceStructuring(self):

        #testingPara = testingParaHarryPotter
        testingPara = self._TestingPara
        passedTests = []
        nonFatalTests = []
        failedTests = []
        # used to predict accuracy of the network
        acTestPred = []
        acTestTrue = []

        # Build a test sequence form each word
        for index, val in enumerate(testingPara):
            tmpTestSeq = []
            target = None
            # grab the next 3 words after
            if (index < len(testingPara) - (self.VectorSizeSS + 1)):
                for index2 in range(0, self.VectorSizeSS):
                    tmpTestSeq.append(testingPara[index + index2])
                target = testingPara[index + self.VectorSizeSS]
                # convert to natural language object
                nloTester = NaturalLanguageObject(tmpTestSeq)
                nloTarget = NaturalLanguageObject([target])
                # get nerual network prediction
                normalPred = self.neuralNetworkSS.getPrediction(
                    nloTester.sentenceNormalised)
                prediction = str(nloTester.tokeniseNormals([normalPred]))
                comp = str(nloTarget.sentenceTags)

                cTrue = nloTarget.sentenceNormalised[0]
                acTestTrue.append(cTrue * 100)
                acTestPred.append(normalPred * 100)

                #if first letters match, this means 'NN' will match with 'NNS'
                if (prediction[2] == comp[2]):
                    #filter for probability
                    probList = self.neuralNetworkSS.getPredictionProbability(
                        nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if (val > prob):
                            prob = val
                    passedTests.append(
                        str(nloTester.sentenceTokenList) + "   Target: " +
                        str(nloTarget.sentenceTokenList) + "    Prediction: " +
                        prediction + " " + str(prob * 100) + "%")
                else:
                    probList = self.neuralNetworkSS.getPredictionProbability(
                        nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if (val > prob):
                            prob = val
                    # if accuracy s less than 30% add to failed list
                    if (prob < 0.3):
                        failedTests.append(
                            str(nloTester.sentenceTokenList) + "   Target: " +
                            str(nloTarget.sentenceTokenList) +
                            "    Prediction: " + prediction + " " +
                            str(prob * 100) + "%")
                    else:
                        # if probability is more than 60% its probably passed
                        if (prob > 0.6):
                            passedTests.append(
                                str(nloTester.sentenceTokenList) +
                                "   Target: " +
                                str(nloTarget.sentenceTokenList) +
                                "    Prediction: " + prediction + " " +
                                str(prob * 100) + "%")
                        else:
                            nonFatalTests.append(
                                str(nloTester.sentenceTokenList) +
                                "   Target: " +
                                str(nloTarget.sentenceTokenList) +
                                "    Prediction: " + prediction + " " +
                                str(prob * 100) + "%")

        # print results
        print("\n")
        print("********** TestSentenceStructuring() **********")
        print("\n")
        ConsoleOutput.printUnderline("Failed Tests: (" +
                                     str(len(failedTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in failedTests:
            ConsoleOutput.printRed(val)
        print("\n")
        ConsoleOutput.printUnderline("Non-Fatal failed Tests: (" +
                                     str(len(nonFatalTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in nonFatalTests:
            ConsoleOutput.printYellow(val)
        print("\n")
        ConsoleOutput.printUnderline("Passed Tests: (" +
                                     str(len(passedTests)) + "/" +
                                     str(len(testingPara)) + ")")
        for val in passedTests:
            ConsoleOutput.printGreen(val)
        print("\n")

        nnAccuracy = accuracy_score(
            np.array(acTestTrue).astype(int),
            np.array(acTestPred).astype(int))
        ConsoleOutput.printYellow("Passed: " + str(len(passedTests)) +
                                  "   Non-Fatals: " + str(len(nonFatalTests)) +
                                  "   Fails: " + str(len(failedTests)))
        ConsoleOutput.printYellow("NeuralNetork accuracy: " +
                                  str(round(nnAccuracy * 100, 1)) + "%")
        print("\n")

Пример #3

Показать файл

Файл: main.py Проект: EricSchles/ann-writer

def Main():
    _isUnitTestingSS = False
    _isUnitTestingV = False
    _recursiveInput = False
    _TrainingDataInputFile = "Datasets/HarryPotter(xxlarge).txt"
    _TestSentence = ""
    _TestSequenceGenSize = 30

    consoleInArgs = sys.argv[1:]
    # check input arguments
    for index, val in enumerate(consoleInArgs):
        # Runs the unit testing module on initiation
        if(val == "-utss"):
            _isUnitTestingSS = True
        # Unit testing for the vocabulary network
        elif(val == "-utv"):
            _isUnitTestingV = True
        elif(len(consoleInArgs) >= index+1):
            # specify training data location
            if(val == "-td"):
                _TrainingDataInputFile = consoleInArgs[index+1]
                ConsoleOutput.printGreen("Training data load locaiton changed to: \"" + _TrainingDataInputFile + "\"")
            # give a generation sentence input
            elif(val == "-ts"):
                _TestSentence = consoleInArgs[index+1]
                if(len(_TestSentence.split()) != _TrainRangeSS):
                    raise ValueError('Test sequence must be the same length as the vector training size. (' + str(_TrainRangeSS) + ')')
            # set the amount of words generated after input
            elif(val == "-tsc"):
                _TestSequenceGenSize = int(consoleInArgs[index+1])
        else:
            raise ValueError('Un-recognized console argument: ' + str(val))
    # Initialise colorama cross-platform console logging
    init()

    neuralNetworkSS = NNSentenceStructure()
    neuralNetworkV = NNVocabulary()
    # Network trainer converts text data into normalized vectors that
    # can be passed into the networks
    networkTrainer = NetworkTrainer(_TrainRangeSS, _TrainRangeV)
    networkTrainer.loadTextFromFile(_TrainingDataInputFile)
    # Trainer parses the structure into vector normal arrays of size (_TrainRangeSS)
    # the next word of the squence is used as the target, example
    # ["Harry", "sat", "on", "his"] - ["broomstick"] <-- target
    networkTrainer.loadSentenceStructureNormals()
    networkTrainer.loadVocabularyNormals(neuralNetworkV)
    # Pass the vectors into the network
    neuralNetworkSS.loadVectorsIntoNetwork(networkTrainer._TrainingSequenceSS, networkTrainer._TrainingTargetsSS)
    # Passs into vocab network here ****

    # Fit data
    neuralNetworkSS.FitNetwork()
    neuralNetworkV.FitNetwork()
    # Fit to vocab network here ****

    # Use console argument "-utss" to activate
    #testing
    uTester = None
    if(_isUnitTestingSS):
        if(uTester == None):
            uTester = UnitTester(neuralNetworkSS, neuralNetworkV, _TrainRangeSS, _TrainRangeV)
        uTester.TestSentenceStructuring()
    # use console argument "-utv" to activate
    if(_isUnitTestingV):
        if(uTester == None):
            uTester = UnitTester(neuralNetworkSS, neuralNetworkV, _TrainRangeSS, _TrainRangeV)
        uTester.TestVocabulary()

    if(_TestSentence != ""):
        genSize = _TestSequenceGenSize
        initialInput = _TestSentence
        print(initialInput + " ", end="")
        initialInput = initialInput.split()
        # generate a sentence of genSize
        for index in range(0, genSize):
            nlo = NaturalLanguageObject(initialInput)
            # since nlo will always be the right size, we can use that variable
            predToke = neuralNetworkSS.getPrediction(nlo.sentenceNormalised)
            nextToke = nlo.tokeniseNormals([predToke])
            # now we have the next toke in the sentence, convert that to word
            word = neuralNetworkV.getPredictedWord(nlo.sentenceNormalised[-1], nextToke[0])
            print(str(word) + " ", end="")
            initialInput.append(word)
            # maintain a size of 'genSize'
            del initialInput[0]
        print("\n")
    # Reset console back to original state
    deinit()

Пример #4

Показать файл

Файл: main.py Проект: lumiscript/ann-writer

def Main():
    _isUnitTestingSS = False
    _isUnitTestingV = False
    _recursiveInput = False
    _TrainingDataInputFile = "Datasets/HarryPotter(xxlarge).txt"
    _TestSentence = ""
    _TestSequenceGenSize = 30

    consoleInArgs = sys.argv[1:]
    # check input arguments
    for index, val in enumerate(consoleInArgs):
        # Runs the unit testing module on initiation
        if (val == "-utss"):
            _isUnitTestingSS = True
        # Unit testing for the vocabulary network
        elif (val == "-utv"):
            _isUnitTestingV = True
        elif (len(consoleInArgs) >= index + 1):
            # specify training data location
            if (val == "-td"):
                _TrainingDataInputFile = consoleInArgs[index + 1]
                ConsoleOutput.printGreen(
                    "Training data load locaiton changed to: \"" +
                    _TrainingDataInputFile + "\"")
            # give a generation sentence input
            elif (val == "-ts"):
                _TestSentence = consoleInArgs[index + 1]
                if (len(_TestSentence.split()) != _TrainRangeSS):
                    raise ValueError(
                        'Test sequence must be the same length as the vector training size. ('
                        + str(_TrainRangeSS) + ')')
            # set the amount of words generated after input
            elif (val == "-tsc"):
                _TestSequenceGenSize = int(consoleInArgs[index + 1])
        else:
            raise ValueError('Un-recognized console argument: ' + str(val))
    # Initialise colorama cross-platform console logging
    init()

    MLNetworkSS = NNSentenceStructure()
    MLNetworkV = NNVocabulary()
    # Network trainer converts text data into normalized vectors that
    # can be passed into the networks
    networkTrainer = NetworkTrainer(_TrainRangeSS, _TrainRangeV)
    networkTrainer.loadTextFromFile(_TrainingDataInputFile)
    # Trainer parses the structure into vector normal arrays of size (_TrainRangeSS)
    # the next word of the squence is used as the target, example
    # ["Harry", "sat", "on", "his"] - ["broomstick"] <-- target
    networkTrainer.loadSentenceStructureNormals()
    networkTrainer.loadVocabularyNormals(MLNetworkV)
    # Pass the vectors into the network
    MLNetworkSS.loadVectorsIntoNetwork(networkTrainer._TrainingSequenceSS,
                                       networkTrainer._TrainingTargetsSS)
    # Passs into vocab network here ****

    # Fit data
    MLNetworkSS.FitNetwork()
    MLNetworkV.FitNetwork()
    # Fit to vocab network here ****

    # Use console argument "-utss" to activate
    #testing
    uTester = None
    if (_isUnitTestingSS):
        if (uTester == None):
            uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS,
                                 _TrainRangeV)
        uTester.TestSentenceStructuring()
    # use console argument "-utv" to activate
    if (_isUnitTestingV):
        if (uTester == None):
            uTester = UnitTester(MLNetworkSS, MLNetworkV, _TrainRangeSS,
                                 _TrainRangeV)
        uTester.TestVocabulary()

    if (_TestSentence != ""):
        genSize = _TestSequenceGenSize
        initialInput = _TestSentence
        print(initialInput + " ", end="")
        initialInput = initialInput.split()
        # generate a sentence of genSize
        for index in range(0, genSize):
            nlo = NaturalLanguageObject(initialInput)
            # since nlo will always be the right size, we can use that variable
            predToke = MLNetworkSS.getPrediction(nlo.sentenceNormalised)
            nextToke = nlo.tokeniseNormals([predToke])
            # now we have the next toke in the sentence, convert that to word
            word = MLNetworkV.getPredictedWord(nlo.sentenceNormalised[-1],
                                               nextToke[0])
            print(str(word) + " ", end="")
            initialInput.append(word)
            # maintain a size of 'genSize'
            del initialInput[0]
        print("\n")
    # Reset console back to original state
    deinit()

Пример #5

Показать файл

Файл: UnitTesting.py Проект: EricSchles/ann-writer

    def TestSentenceStructuring(self):

        #testingPara = testingParaHarryPotter
        testingPara = self._TestingPara
        passedTests = []
        nonFatalTests = []
        failedTests = []
        # used to predict accuracy of the network
        acTestPred = []
        acTestTrue = []

        # Build a test sequence form each word
        for index, val in enumerate(testingPara):
            tmpTestSeq = []
            target = None
            # grab the next 3 words after
            if(index < len(testingPara)-(self.VectorSizeSS+1)):
                for index2 in range(0, self.VectorSizeSS):
                    tmpTestSeq.append(testingPara[index+index2])
                target = testingPara[index+self.VectorSizeSS]
                # convert to natural language object
                nloTester = NaturalLanguageObject(tmpTestSeq)
                nloTarget = NaturalLanguageObject([target])
                # get nerual network prediction
                normalPred = self.neuralNetworkSS.getPrediction(nloTester.sentenceNormalised)
                prediction = str(nloTester.tokeniseNormals([normalPred]))
                comp = str(nloTarget.sentenceTags)

                cTrue = nloTarget.sentenceNormalised[0]
                acTestTrue.append(cTrue*100)
                acTestPred.append(normalPred*100)

                #if first letters match, this means 'NN' will match with 'NNS'
                if(prediction[2] == comp[2]):
                    #filter for probability
                    probList = self.neuralNetworkSS.getPredictionProbability(nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if(val > prob):
                            prob = val
                    passedTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                    + prediction  + " " +str(prob*100) + "%")
                else:
                    probList = self.neuralNetworkSS.getPredictionProbability(nloTester.sentenceNormalised)
                    prob = 0
                    for val in probList[0]:
                        if(val > prob):
                            prob = val
                    # if accuracy s less than 30% add to failed list
                    if(prob < 0.3):
                        failedTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                        + prediction  + " " +str(prob*100) + "%")
                    else:
                        # if probability is more than 60% its probably passed
                        if(prob > 0.6):
                            passedTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                            + prediction  + " " +str(prob*100) + "%")
                        else:
                            nonFatalTests.append(str(nloTester.sentenceTokenList) + "   Target: " + str(nloTarget.sentenceTokenList) + "    Prediction: "
                            + prediction  + " " +str(prob*100) + "%")

        # print results
        print("\n")
        print("********** TestSentenceStructuring() **********")
        print("\n")
        ConsoleOutput.printUnderline("Failed Tests: (" + str(len(failedTests)) + "/" + str(len(testingPara)) + ")")
        for val in failedTests:
            ConsoleOutput.printRed(val)
        print("\n")
        ConsoleOutput.printUnderline("Non-Fatal failed Tests: (" + str(len(nonFatalTests)) + "/" + str(len(testingPara)) + ")")
        for val in nonFatalTests:
            ConsoleOutput.printYellow(val)
        print("\n")
        ConsoleOutput.printUnderline("Passed Tests: (" + str(len(passedTests)) + "/" + str(len(testingPara)) + ")")
        for val in passedTests:
            ConsoleOutput.printGreen(val)
        print("\n")

        nnAccuracy = accuracy_score(np.array(acTestTrue).astype(int), np.array(acTestPred).astype(int))
        ConsoleOutput.printYellow("Passed: " + str(len(passedTests)) + "   Non-Fatals: " + str(len(nonFatalTests)) + "   Fails: " + str(len(failedTests)))
        ConsoleOutput.printYellow("NeuralNetork accuracy: " + str(round(nnAccuracy*100,1)) + "%")
        print("\n")