Exemplo n.º 1
0
def run(args = sys.argv[1:]):
    if (len(args) == 0):
        printHelp()
    elif args[0].lower() == "train":
        try:
            print "\n====== Start ======"
            print "\nGenerate from the gold standard training corpus an English lexicon", args[1] + ".DICT"
            createLexicon(args[1], 'full')
            createLexicon(args[1], 'short')
            print "\nExtract from the gold standard training corpus a raw text corpus", args[1] + ".RAW"
            getRawText(args[1], args[1] + ".RAW")      
            print "\nPerform initially POS tagging on the raw text corpus, to create", args[1] + ".INIT"
            DICT = readDictionary(args[1] + ".sDict")
            initializeEnCorpus(DICT, args[1] + ".RAW", args[1] + ".INIT")
            print '\nLearn a tree model of rules for English POS tagging from %s and %s' % (args[1], args[1] + ".INIT")       
            rdrTree = SCRDRTreeLearner(THRESHOLD[0], THRESHOLD[1]) 
            rdrTree.learnRDRTree(args[1] + ".INIT", args[1])  
            print "\nWrite the learned tree model to file ", args[1] + ".RDR"
            rdrTree.writeToFile(args[1] + ".RDR")                
            print '\nDone!'    
            os.remove(args[1] + ".INIT")
            os.remove(args[1] + ".RAW")
            os.remove(args[1] + ".sDict")   
        except Exception, e:
            print "\nERROR ==> ", e
            printHelp()
Exemplo n.º 2
0
def run(args=sys.argv[1:]):
    if (len(args) == 0):
        printHelp()
    elif args[0].lower() == "train":
        try:
            print "\n====== Start ======"
            print "\nGenerate from the gold standard training corpus an English lexicon", args[
                1] + ".DICT"
            createLexicon(args[1], 'full')
            createLexicon(args[1], 'short')
            print "\nExtract from the gold standard training corpus a raw text corpus", args[
                1] + ".RAW"
            getRawText(args[1], args[1] + ".RAW")
            print "\nPerform initially POS tagging on the raw text corpus, to create", args[
                1] + ".INIT"
            DICT = readDictionary(args[1] + ".sDict")
            initializeEnCorpus(DICT, args[1] + ".RAW", args[1] + ".INIT")
            print '\nLearn a tree model of rules for English POS tagging from %s and %s' % (
                args[1], args[1] + ".INIT")
            rdrTree = SCRDRTreeLearner(THRESHOLD[0], THRESHOLD[1])
            rdrTree.learnRDRTree(args[1] + ".INIT", args[1])
            print "\nWrite the learned tree model to file ", args[1] + ".RDR"
            rdrTree.writeToFile(args[1] + ".RDR")
            print '\nDone!'
            os.remove(args[1] + ".INIT")
            os.remove(args[1] + ".RAW")
            os.remove(args[1] + ".sDict")
        except Exception, e:
            print "\nERROR ==> ", e
            printHelp()
Exemplo n.º 3
0
def run(args=sys.argv[1:]):
    if (len(args) == 0):
        printHelp()
    elif args[0].lower() == "train":
        try:
            print("\n====== Start ======")
            print(
                "\nGenerate from the gold standard training corpus a lexicon",
                args[1] + ".DICT")
            createLexicon(args[1], 'full')
            createLexicon(args[1], 'short')
            print(
                "\nExtract from the gold standard training corpus a raw text corpus",
                args[1] + ".RAW")
            getRawText(args[1], args[1] + ".RAW")
            print(
                "\nPerform initially POS tagging on the raw text corpus, to generate",
                args[1] + ".INIT")
            DICT = readDictionary(args[1] + ".sDict")
            initializeCorpus(DICT, args[1] + ".RAW", args[1] + ".INIT")
            print(
                '\nLearn a tree model of rules for POS tagging from %s and %s'
                % (args[1], args[1] + ".INIT"))
            rdrTree = SCRDRTreeLearner(THRESHOLD[0], THRESHOLD[1])
            rdrTree.learnRDRTree(args[1] + ".INIT", args[1])
            print("\nWrite the learned tree model to file ", args[1] + ".RDR")
            rdrTree.writeToFile(args[1] + ".RDR")
            print('\nDone!')
            os.remove(args[1] + ".INIT")
            os.remove(args[1] + ".RAW")
            os.remove(args[1] + ".sDict")
        except Exception as e:
            print("\nERROR ==> ", e)
            printHelp()
            raise e
    elif args[0].lower() == "tag":
        try:
            r = RDRPOSTagger()
            print("\n=> Read a POS tagging model from", args[1])
            r.constructSCRDRtreeFromRDRfile(args[1])
            print("\n=> Read a lexicon from", args[2])
            DICT = readDictionary(args[2])
            print("\n=> Perform POS tagging on", args[3])
            r.tagRawCorpus(DICT, args[3])
        except Exception as e:
            print("\nERROR ==> ", e)
            printHelp()
            raise e
    else:
        printHelp()