示例#1
0
    def writeToFile(self):
        file = open('user_input.txt', 'w')
        file.write(self.text_area.get("1.0",END+"-1c"))
        file.close()

        ##############################################################################

        # initialize variables
        input = "user_input.txt"
        fio = FileIo(input)
        ip = InputProcessor()
        bp = BlockProcessor()

        # initial setup, process input, tokenize, find parts of speech
        the2DArray = ip.processInput(fio.getFile())
        the2DArray = bp.removeCommas(the2DArray)
        tokenized = ip.tokenize(the2DArray)
        pos = bp.posTagger(the2DArray)


        ##############################################################################

        # noun and verb phrase chunking
        chunkPattern = """
                NP: {<DT|PP\$>?<CD>?(<JJ>|<JJR>|<JJS>)*(<NN>|<NNP>|<NNPS>|<NNS>|<POS>)+}
                {<NNP>+}
                {<NN>+}
                {<PRP>+}
                {<DT><JJ>}
            
                VP: {<MD|TO|RB>?<VB.*>+<RB>?<VB.*>?}
                {<VB.*>+}
                """
        phraseChunk = bp.phraseChunker(tokenized, chunkPattern)
        #for tree in phraseChunk:
        #    print tree

        ##############################################################################

        # count nouns per block and total, update the2DArray
        nounDict = bp.countNouns(pos)
        for key, value in nounDict.iteritems() :
            if key is 'total':
                totalNouns = value
            else:
                the2DArray = bp.updateArray(the2DArray,key,'nounCount',value)

        ##############################################################################

        # count verbs per block and total, update the2DArray
        verbDict = bp.countVerbs(pos)
        for key, value in verbDict.iteritems() :
            if key is 'total':
                totalVerbs = value
            else:
                the2DArray = bp.updateArray(the2DArray,key,'verbCount',value)

        ##############################################################################

        # count adjectives per block and total, update the2DArray
        adjectiveDict = bp.countAdjectives(pos)
        for key, value in adjectiveDict.iteritems() :
            if key is 'total':
                totalAdjectives = value
            else:
                the2DArray = bp.updateArray(the2DArray,key,'adjectiveCount',value)

        ##############################################################################

        # count pronouns per block and total, update the2DArray
        pronounDict = bp.countPronouns(pos)
        for key, value in pronounDict.iteritems() :
            if key is 'total':
                totalPronouns = value
            else:
                the2DArray = bp.updateArray(the2DArray,key,'pronounCount',value)

        ##############################################################################

        # count adverbs per block and total, update the2DArray
        adverbDict = bp.countAdverbs(pos)
        for key, value in adverbDict.iteritems() :
            if key is 'total':
                totalAdverbs = value
            else:
                the2DArray = bp.updateArray(the2DArray,key,'adverbCount',value)

        ##############################################################################

        # count other parts of speech per block and total, update the2DArray
        otherDict = bp.countOther(pos)
        for key, value in otherDict.iteritems() :
            if key is 'total':
                totalOther = value
            else:
                the2DArray = bp.updateArray(the2DArray,key,'otherCount',value)

        ##############################################################################

        # count words per block and total, update the2DArray
        wordCountDict = bp.wordCount(tokenized)
        for key, value in wordCountDict.iteritems() :
            if key is 'total':
                totalWordCount = value
            else:
                the2DArray = bp.updateArray(the2DArray,key,'totalWordCount',value)

        ##############################################################################

        # update the2DArray with totals
        the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'nounCount',totalNouns)
        the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'verbCount',totalVerbs)
        the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'adjectiveCount',totalAdjectives)
        the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'pronounCount',totalPronouns)
        the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'adverbCount',totalAdverbs)
        the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'otherCount',totalOther)
        the2DArray = bp.updateArray(the2DArray,len(the2DArray)-1,'totalWordCount',totalWordCount)

        ##############################################################################

        # process distinct word count and TF-IDF 
        distinctWordCountArray = bp.distinctWordCount(tokenized)
        tf_idfArray = bp.tf_idf_Count(tokenized)

        ##############################################################################

        # ask user for directory name where the output csv files will be saved to
        dirname = tkFileDialog.askdirectory(initialdir="/",title="Choose Directory Location for Results")
        outputDirBase = dirname + '/'
        
        # csv result files will be located in teamNLP file followed by a number
        # if one or more exist already in the user directory location
        count = 1
        baseName = 'teamNLP'
        outputFileName = outputDirBase + baseName
        while (os.path.exists(outputFileName)): # while the directory name exists
            count += 1 # increment the counter...
            outputFileName = outputDirBase + baseName + str(count) 
        os.mkdir(outputFileName) # create folder in user's chosen directory location
        
        numpy.savetxt(outputFileName + '/the2DArray.csv', the2DArray, delimiter=",", fmt="%s")
        numpy.savetxt(outputFileName + '/distinctWordCountArray.csv', distinctWordCountArray, delimiter=",", fmt="%s")
        numpy.savetxt(outputFileName + '/tf_idfArray.csv', tf_idfArray, delimiter=",", fmt="%s")
示例#2
0
from InputProcessor import *
from BlockProcessor import *
import nltk
from Switch import *
import sys
import os

if __name__ == "__main__":

    os.system('clear')
    input = "../input.txt"
    #input = "input2"
    fio = FileIo(input)
    ip = InputProcessor()
    bp = BlockProcessor()
    processInput = ip.processInput(fio.getFile())
    tokenized = ip.tokenize(processInput)
    pos = bp.posTagger(processInput)

    print "Original input text:"
    print "###################################################################################\n\n"
    fio.toString();

    print "\n###################################################################################\n\n"

    if (len(sys.argv) == 2):
        choice = str(sys.argv[1])	
    else:
        choice = raw_input("""
        Please enter the number of the test to run:
        \tq) quit