def __init__(self, lexicon, C=1, num_features=100): self.training_set = None self.classes = None self.test_set = None self.results = None self.kernel = ker.Linear() self.C = C self.feature_data = PATH + "/learning/stored/feature.data" self.label_data = PATH + "/learning/stored/svm_label.data" self.lexicon = lexicon self.num_features = len(self.lexicon.words.keys()) try: print "Loading existing SVM..." features = pickle.load(open(self.feature_data)) labels = pickle.load(open(self.label_data)) sparsedata = SparseDataSet(features, L=labels) self.svm_classifier = loadSVM(PATH + "/learning/stored/svm.classifier",sparsedata) except Exception as e: print e print "Existing SVM not found!" self.svm_classifier = svm.SVM(self.kernel) self.accuracy = None self.predicted_labels = None score = featsel.FeatureScore('golub') self.filter = featsel.Filter(score) self.feature_selector = FeatureSelect(self.svm_classifier, self.filter) self.chain = Chain([self.feature_selector, self.svm_classifier])
def train(self, trainSet_X, trainSet_Y): trainSet_X = numpy.array(trainSet_X).astype(float) trainSet_Y = numpy.array(trainSet_Y).astype(float) labels = trainSet_Y.tolist() samples = trainSet_X.tolist() data = datafunc.VectorDataSet(samples, L=map(str,labels)) self.svm = svm.SVM(self.kernel, C=self.C) self.svm.train(data) self.trained=True self.uniqueY = numpy.unique(labels) self.nClasses = len(self.uniqueY)