/
AlignmentClassifier.py
79 lines (55 loc) · 2.67 KB
/
AlignmentClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import nltk
import numpy
from AlignmentFeatures import extractAlignmentFeatures
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
def trainAlignmentClassifierScikit(labeled_word_problems, algorithm):
featuresets = []
correctlyAlignedIndicesList = []
for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems:
print iIndex
alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i, 'train')
if len(correctlyAlignedIndices) != 0 and len(alignmentFeatures) != 0:
featuresets.append(alignmentFeatures)
alignedString = ''
for i in correctlyAlignedIndices:
alignedString += str(i)
correctlyAlignedIndicesList.append(alignedString)
#sfeaturesets = numpy.array(featuresets)
#print type(featuresets)
#correctlyAlignedIndicesList = numpy.array(correctlyAlignedIndicesList)
#print featuresets
#print correctlyAlignedIndicesList
classifier = None
if len(featuresets) != 0:
if algorithm == 'SVM':
classifier = SVC()
elif algorithm == 'NaiveBayes':
classifier = GaussianNB()
elif algorithm == 'DecisionTree':
classifier = DecisionTreeClassifier()
elif algorithm == 'MaxEnt':
classifier = LogisticRegression()
classifier.fit(featuresets, correctlyAlignedIndicesList)
return classifier
def trainAlignmentClassifier(labeled_word_problems, algorithm):
featuresets = []
for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems:
alignmentFeatures, correctlyAlignedIndices = extractAlignmentFeatures(wordproblem, equationTemplate, solution, i)
if len(correctlyAlignedIndices) != 0:
featuresets.append((alignmentFeatures, correctlyAlignedIndices))
#featuresets = [(extractAlignmentFeatures(wordproblem, equationTemplate, solution), i)
#for (i, iIndex, wordproblem, equationTemplate, solution) in labeled_word_problems]
print featuresets
train_set = featuresets
if algorithm == 'DecisionTree':
classifier = nltk.DecisionTreeClassifier.train(train_set)
elif algorithm == 'NaiveBayes':
classifier = nltk.NaiveBayesClassifier.train(train_set)
elif algorithm == 'MaxEntMegam':
classifier = nltk.classify.MaxentClassifier.train(train_set, 'MEGAM', trace=0, max_iter=1)
elif algorithm == 'MaxEnt':
classifier = nltk.MaxentClassifier.train(train_set)
return classifier