Exemplo n.º 1
0
def testAlignments(test_data, classifiers):
    x0 = Symbol('x0')
    featuresets = []
    correctlyAlignedIndicesList = []
    for (i, iIndex, wordproblem, equationTemplate, solution) in test_data:
        try:
            classifier = classifiers[str(equationTemplate)]
        except KeyError:
            continue
        alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i)        
        featuresets.append(alignmentFeatures)
        alignedString = ''
        for c in correctlyAlignedIndices:
            alignedString += str(c)

        correctlyAlignedIndicesList.append(alignedString)
    prediction = classifier.predict(featuresets)

    predictedAlignedIndices = []
    correct = 0

    for each in prediction:
        item = []
        for char in each:
            item.append(int(char))
        predictedAlignedIndices.append(item)

    #print 'sdsdsdsdsdsdsd'
    #print predictedAlignedIndices
    predictedAlignedValues = []
    for x in range(0, len(test_data)):
        try:
            i, iIndex,wordproblem, equationTemplate, solution = test_data[x]
            numberVector = extractNumberVectorFromQuestion(wordproblem)
            correctAlignValues = []
            for each in predictedAlignedIndices[x]:
                correctAlignValues.append(numberVector[each])
            predictedAlignedValues.append(correctAlignValues)
            #print 'correctalignedvalues'
            #print correctAlignValues
            result = solveEquations(equationTemplate, getNumberslots(equationTemplate), correctAlignValues)
            if solution[0] == result[x0]:
                correct += 1
        except:
            pass
        #print 'all values'
        #print predictedAlignedValues


    for i in range(0,len(prediction)):
        print correctlyAlignedIndicesList[i], prediction[i]
        if correctlyAlignedIndicesList[i] == prediction[i]:
            correct += 1
    print 'Correct: ' + str(correct)
def trainAlignmentClassifierScikit(labeled_word_problems, algorithm):
    
    featuresets = []
    correctlyAlignedIndicesList = []
    for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems:
        print iIndex
        alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i, 'train')

        if len(correctlyAlignedIndices) != 0 and len(alignmentFeatures) != 0:
            featuresets.append(alignmentFeatures)
            alignedString = '' 
            for i in correctlyAlignedIndices:
                alignedString += str(i)

            correctlyAlignedIndicesList.append(alignedString)

    #sfeaturesets = numpy.array(featuresets)
    #print type(featuresets)
    #correctlyAlignedIndicesList = numpy.array(correctlyAlignedIndicesList)


    #print featuresets
    #print correctlyAlignedIndicesList

    classifier = None

    if len(featuresets) != 0:
        if algorithm == 'SVM':
            classifier = SVC()
        elif algorithm == 'NaiveBayes':
            classifier = GaussianNB()
        elif algorithm == 'DecisionTree':
            classifier = DecisionTreeClassifier()
        elif algorithm == 'MaxEnt':
            classifier = LogisticRegression()

        classifier.fit(featuresets, correctlyAlignedIndicesList)

    return classifier
def trainAlignmentClassifier(labeled_word_problems, algorithm):
    
    featuresets = []
    for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems:
        alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i)
        if len(correctlyAlignedIndices) != 0:
            featuresets.append((alignmentFeatures, correctlyAlignedIndices))
        
        #featuresets = [(extractAlignmentFeatures(wordproblem, equationTemplate, solution), i) 
        #for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems]

    print featuresets
    train_set = featuresets

    if algorithm == 'DecisionTree':
        classifier = nltk.DecisionTreeClassifier.train(train_set)
    elif algorithm == 'NaiveBayes':
        classifier = nltk.NaiveBayesClassifier.train(train_set)
    elif algorithm == 'MaxEntMegam':
        classifier = nltk.classify.MaxentClassifier.train(train_set, 'MEGAM', trace=0, max_iter=1)
    elif algorithm == 'MaxEnt':
        classifier = nltk.MaxentClassifier.train(train_set)

    return classifier