Пример #1
0
def main():

    print u.now(), "Start"

    modelFile = "tag.model"
    trainingFile = "gene.train"
    
    inputFile = "gene.dev"
    outputFile = "dev.p1.out"
    
    lines = u.readFromFile(inputFile)
    
    #weightVector = p1.readPreTrainedModel(modelFile)

    # Empty weight vector means a vector of 0s
    weightVector = dict()

    featureData = FeatureData(weightVector)
    
    trainingSet = readTrainingSetFile(trainingFile)

    perceptron(trainingSet, featureData, 5)


#    tagSequences = []
#
#    while len(lines) > 0:       
        #sentence, lines = u.GetNextSentence(lines)
        
#    genX = gen(["", "hello", "there", "foo", "bar", "bacon"])

    
    print u.now(), "Done"
Пример #2
0
def main():

    print u.now(), "Start"

    modelFile = "tag.model"
    
    inputFile = "gene.dev"
    outputFile = "dev.p1.out"
    
#    inputFile = "short.dev"
#    outputFile = "dev.short.out"

    inputFile = "gene.test"
    outputFile = "gene_test.p1.out"


    weightVector = readPreTrainedModel(modelFile)
    
    lines = u.readFromFile(inputFile)
    
    tagSequences = []

    while len(lines) > 0:       
        sentence, lines = u.GetNextSentence(lines)

        featureData = FeatureData(weightVector)
        tagSequence = ViterbiGLM(featureData, sentence)
        
        tagSequences.append((tagSequence, sentence))
    
    outputText = []

    lines = u.readFromFile(inputFile)

    del (tagSequences[0][0])[0]
    del (tagSequences[0][1])[0]

    for tagSequence, sentence in tagSequences:
        
        for j in xrange(0,len(tagSequence)):

            if len(tagSequence) != len(sentence):
                continue

            tag = tagSequence[j]
            word = sentence[j]


            outputText.append("%s %s" % (word, tag))
            
            
    outputText.append("")
    outputText.append("")
    
    outputText = '\n'.join(outputText)
    
    u.saveToFile(outputFile, outputText)
    
    print u.now(), "Done"