def penalty_l1(X, y, l): clf = LinearSVC() clf.dual = False clf.max_iter = 2500 clf.penalty = 'l1' clf.C = 1 / l clf.fit(X, y) return clf
def train_w(self, hyper_param_c=1.0, normalized_norm=1.0): # Train with linear svm (with no bias) model = LinearSVC(random_state=0, fit_intercept=False) model.C = hyper_param_c model.fit(self.X, self.y) self.coef = np.ravel(model.coef_) print('w norm: %.4f - [interfaces.py - Line 268]' % norm(self.coef)) self.coef /= norm(self.coef) / normalized_norm return self.coef
def one_vs_rest_svm(train_x, train_y, test_x): """ Trains a linear SVM for binary classifciation Args: train_x - (n, d) NumPy array (n datapoints each with d features) train_y - (n, ) NumPy array containing the labels (0 or 1) for each training data point test_x - (m, d) NumPy array (m datapoints each with d features) Returns: pred_test_y - (m,) NumPy array containing the labels (0 or 1) for each test data point """ model = LinearSVC() model.random_state = 0 model.C = 0.1 model.fit(train_x, train_y) pred_test_y = model.predict(test_x) return pred_test_y
def multi_class_svm(train_x, train_y, test_x): """ Trains a linear SVM for multiclass classifciation using a one-vs-rest strategy Args: train_x - (n, d) NumPy array (n datapoints each with d features) train_y - (n, ) NumPy array containing the labels (int) for each training data point test_x - (m, d) NumPy array (m datapoints each with d features) Returns: pred_test_y - (m,) NumPy array containing the labels (int) for each test data point """ svc = LinearSVC() svc.random_state = 0 svc.C = 0.1 svc.fit(train_x, train_y) pred_test_y = svc.predict(test_x) return pred_test_y
def findOptSVM(texts,y ): vect = CountVectorizer(preprocessor=preprocessor) tfidf = TfidfTransformer() #svc = SGDClassifier() svc = LinearSVC() vX = vect.fit_transform(texts) tfidfX = tfidf.fit_transform(vX) X_train,X_test,y_train,y_test = train_test_split(tfidfX,y,test_size=0.33,random_state=42) accuracyArr = [] cRange = np.logspace(-5,5,10) for c in cRange: #svc.alpha = c svc.C = c svc.fit(X_train,y_train) accuracyArr.append(svc.score(X_test,y_test)) C_opt = cRange[np.argmax(accuracyArr)] return C_opt
def to_super(self): superinstance = LinearSVC() superinstance.C = self.C superinstance.class_weight = self.class_weight superinstance.classes_ = self.classes_ superinstance.coef_ = self.coef_ superinstance.dual = self.dual superinstance.fit_intercept = self.fit_intercept superinstance.intercept_ = self.intercept_ superinstance.intercept_scaling = self.intercept_scaling superinstance.loss = self.loss superinstance.max_iter = self.max_iter superinstance.multi_class = self.multi_class superinstance.n_iter_ = self.n_iter_ superinstance.penalty = self.penalty superinstance.random_state = self.random_state superinstance.tol = self.tol superinstance.verbose = self.verbose return superinstance
def findOptSVM(texts, y): vect = CountVectorizer(preprocessor=preprocessor) tfidf = TfidfTransformer() #svc = SGDClassifier() svc = LinearSVC() vX = vect.fit_transform(texts) tfidfX = tfidf.fit_transform(vX) X_train, X_test, y_train, y_test = train_test_split(tfidfX, y, test_size=0.33, random_state=42) accuracyArr = [] cRange = np.logspace(-5, 5, 10) for c in cRange: #svc.alpha = c svc.C = c svc.fit(X_train, y_train) accuracyArr.append(svc.score(X_test, y_test)) C_opt = cRange[np.argmax(accuracyArr)] return C_opt
mndata = MNIST('samples') X, y = mndata.load_training() X = np.array(X) y = np.array(y, dtype=np.int32) test_X, test_y = mndata.load_testing() test_X = np.array(test_X) test_y = np.array(test_y, dtype=np.int32) if testLinear: clf = LinearSVC(loss="hinge", max_iter=2000) Cs = [0.01, 0.1, 1.0, 10.0, 100.0] for C in Cs: clf.C = C clf.fit(X, y) train_scores += [100 * (1 - clf.score(X, y))] test_scores += [100 * (1 - clf.score(test_X, test_y))] print("For C = " + str(C) + ", the training error is " + str(100 * (1 - clf.score(X, y))) + ".") print("For C = " + str(C) + ", the test error is " + str(100 * (1 - clf.score(test_X, test_y))) + ".") else: clf = SVC(kernel="poly", degree=2, C=1.0, gamma="auto") clf.fit(X, y) print("The training error for SVC with a quadratic kernel is " + str(100 * (1 - clf.score(X, y))) + ".") print("The test error for SVC with a quadratic kernel is " +