def nusvc_model(x_train, y_train, x_val, y_val, x_test, testid): scaler = StandardScaler() x_stand_train = scaler.fit_transform(x_train) x_stand_val = scaler.transform(x_val) x_stand_test = scaler.transform(x_test) #nus = [0.05,0.1,0.2,0.3,0.4,0.5,0.6] #for i in range(len(nus)): clf = NuSVC(nu=0.3, kernel="linear", probability=True, decision_function_shape="ovo", gamma="scale", class_weight="balanced") clf.fit(x_train, y_train) y_pred_val = clf.predict(x_val) BMAC = balanced_accuracy_score(y_val, y_pred_val) print("BMAC of this model: ", BMAC) print("\n") print("=" * 30) y_pred = clf.predict(x_test) return y_pred, testid
class RbfSVM: def __init__(self): self.clf = NuSVC(nu=0.7, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, verbose=False, max_iter=-1) self.pattern ='(?u)\\b[A-Za-z]{3,}' self.tfidf = TfidfVectorizer(sublinear_tf=False, use_idf=True, smooth_idf=True, stop_words='english', token_pattern=self.pattern, ngram_range=(1, 3)) def train(self,fileName): print "RbfSVM Classifier is being trained" table = pandas.read_table(fileName, sep="\t", names=["cat", "message"]) X_train = self.tfidf.fit_transform(table.message) Y_train = [] for item in table.cat: Y_train.append(int(item)) self.clf.fit(X_train, Y_train) print "RbfSVM Classifier has been trained" def classify(self,cFileName, rFileName): table = pandas.read_table(cFileName, names=["message"]) X_test = self.tfidf.transform(table.message) print "Data have been classified" with open(rFileName,'w') as f: for item in self.clf.predict(X_test).astype(str): f.write(item+'\n') def validate(self,fileName): table = pandas.read_table(fileName, sep="\t", names=["cat", "message"]) X_validate = self.tfidf.transform(table.message) Y_validated = self.clf.predict(X_validate).astype(str) totalNum = len(table.cat) errorCount = 0 for i in range(0,totalNum): if int(table.cat[i])!=int(Y_validated[i]): errorCount += 1 print "Data have been validated! Precision={}".format((totalNum-errorCount)/float(totalNum))
class svm(): def __init__(self): # self.clf = SVC(kernel='rbf') self.clf = NuSVC() def train(self, inputs): # Parameters: # inputs: An array of Input objects containing input vectors along with their corresponding labels. # Creates lists to use for fitting model X = [] Y = [] for data in inputs: X.append((data.x/np.linalg.norm(data.x))) Y.append(data.y) # Fit model self.clf.fit(X, Y) def predict(self, input): # Parameters: # input: An Input object containing an input vector to be used for predicting a label. x = input.x/np.linalg.norm(input.x) if isinstance(input, Input): return self.clf.predict(x) else: x = input/np.linalg.norm(input) return self.clf.predict(x)
def Nusvc(X,y,xtest): print("Nu Support-Vector-Machine") from sklearn.svm import NuSVC clf = NuSVC(random_state=0,gamma='auto') clf.fit(X, y) y_pred=clf.predict(xtest) return y_pred
def fd_svm_time_prior(train, test, ytrain, ytest, seq, k): for i in range(len(train) - seq + 1): for j in range(1, seq): train[i] = train[i] + train[i + j] train = train[:-seq + 1] train = np.array(train).astype('float64') train_y = np.array(ytrain[seq - 1:]).astype('float64') for i in range(len(test) - seq + 1): for j in range(1, seq): test[i] = test[i] + test[i + j] test = test[:-seq + 1] test = np.array(test).astype('float64') test_y = np.array(ytest[seq - 1:]).astype('float64') clf = NuSVC() clf.fit(train, train_y) predict_y = clf.predict(test) # return clf.predict(test) predict_y = list(predict_y) for i in range(len(predict_y) - k + 1): if 0 in set(predict_y[i:i + k]): continue else: for j in range(i + k, len(predict_y)): predict_y[j] = 1 break for i in range(len(predict_y)): if predict_y[i] == test_y[i]: predict_y[i] = 1 else: predict_y[i] = 0 return np.average(predict_y)
def cross_validation(type): f1 = 0 acc = 0 skf = StratifiedKFold(n_splits=8) df_x, df_y, model = tfidf([], []) df_x = model if type is 'NuSMV': clf = NuSVC() elif type is 'LinearSMV': clf = LinearSVC() else: clf = DecisionTreeClassifier() for train_index, test_index in skf.split(df_x, df_y): x_train, x_test = df_x[train_index], df_x[test_index] y_train, y_test = df_y[train_index], df_y[test_index] clf.fit(x_train, y_train) prediction = clf.predict(x_test) # print(classification_report(y_test, prediction)) f1 += f1_score(y_test, prediction, average='weighted') acc += accuracy_score(y_test, prediction) return f1 / 8, acc / 8
def predict_loo(transformed_data, args, trn_label ,tst_label): print 'imgpred loo', print args.loo, sys.stdout.flush() (ndim, nsample , nsubjs) = transformed_data.shape loo = args.loo loo_idx = range(nsubjs) loo_idx.remove(loo) #tst_data = np.zeros(shape = (ndim,nsample)) trn_data = np.zeros(shape = (ndim,(nsubjs-1)*nsample)) # image stimulus prediction # tst_data : ndim x nsample tst_data = transformed_data[:,:,loo] for m in range(len(loo_idx)): trn_data[:,m*nsample:(m+1)*nsample] = transformed_data[:,:,loo_idx[m]] # scikit-learn svm for classification clf = NuSVC(nu=0.5, kernel = 'linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu = sum(pred_label == tst_label)/float(len(pred_label)) return accu
def predict(transformed_data, args, trn_label, tst_label): print 'imgpred', sys.stdout.flush() (ndim, nsample, nsubjs) = transformed_data.shape accu = np.zeros(shape=nsubjs) tst_data = np.zeros(shape=(ndim, nsample)) trn_data = np.zeros(shape=(ndim, (nsubjs - 1) * nsample)) # image stimulus prediction for tst_subj in range(nsubjs): tst_data = transformed_data[:, :, tst_subj] trn_subj = range(nsubjs) trn_subj.remove(tst_subj) for m in range(nsubjs - 1): trn_data[:, m * nsample:(m + 1) * nsample] = transformed_data[:, :, trn_subj[m]] # scikit-learn svm for classification #clf = NuSVC(nu=0.5, kernel = 'linear') clf = NuSVC(nu=0.5, kernel='linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu[tst_subj] = sum(pred_label == tst_label) / float(len(pred_label)) return accu
def predict(transformed_data, args, trn_label ,tst_label): print 'imgpred', sys.stdout.flush() (ndim, nsample , nsubjs) = transformed_data.shape accu = np.zeros(shape=nsubjs) tst_data = np.zeros(shape = (ndim,nsample)) trn_data = np.zeros(shape = (ndim,(nsubjs-1)*nsample)) # image stimulus prediction for tst_subj in range(nsubjs): tst_data = transformed_data[:,:,tst_subj] trn_subj = range(nsubjs) trn_subj.remove(tst_subj) for m in range(nsubjs-1): trn_data[:,m*nsample:(m+1)*nsample] = transformed_data[:,:,trn_subj[m]] # scikit-learn svm for classification #clf = NuSVC(nu=0.5, kernel = 'linear') clf = NuSVC(nu=0.5, kernel = 'linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu[tst_subj] = sum(pred_label == tst_label)/float(len(pred_label)) return accu
def predict_loo(transformed_data, args, trn_label, tst_label): print 'imgpred loo', print args.loo, sys.stdout.flush() (ndim, nsample, nsubjs) = transformed_data.shape loo = args.loo loo_idx = range(nsubjs) loo_idx.remove(loo) #tst_data = np.zeros(shape = (ndim,nsample)) trn_data = np.zeros(shape=(ndim, (nsubjs - 1) * nsample)) # image stimulus prediction # tst_data : ndim x nsample tst_data = transformed_data[:, :, loo] for m in range(len(loo_idx)): trn_data[:, m * nsample:(m + 1) * nsample] = transformed_data[:, :, loo_idx[m]] # scikit-learn svm for classification clf = NuSVC(nu=0.5, kernel='linear') clf.fit(trn_data.T, trn_label) pred_label = clf.predict(tst_data.T) accu = sum(pred_label == tst_label) / float(len(pred_label)) return accu
def nu(newX, y, newDev, devLabel): clNu = NuSVC(gamma='scale') clNu.fit(newX, y) nuResult = clNu.predict(newDev) finalResult = nuResult != devLabel
class Classifier: def __init__(self, objective_data, subjective_data): OBJECTIVE = 0 SUBJECTIVE = 1 self.objective_data = objective_data self.subjective_data = subjective_data self.text = objective_data + subjective_data self.labels = [OBJECTIVE for i in objective_data] + [SUBJECTIVE for i in subjective_data] tuple_list = zip(self.text, self.labels) random.shuffle(tuple_list) self.text = [x for x,y in tuple_list] self.label = [y for x,y in tuple_list] self.count_vectorizer = CountVectorizer(stop_words="english", min_df=3) # count vectorizer and specific classifier that will be used self.counts = self.count_vectorizer.fit_transform(self.text) self.classifier = None self.tf_transformer = TfidfTransformer(use_idf=True) self.frequencies = self.tf_transformer.fit_transform(self.counts) def multinomialNB(self): self.classifier = MultinomialNB(alpha=.001) self.classifier.fit(self.frequencies, self.labels) def predict(self, examples): example_counts = self.count_vectorizer.transform(examples) example_tf = self.tf_transformer.transform(example_counts) predictions = self.classifier.predict(example_tf) return predictions def linearSVC(self): self.classifier = LinearSVC() self.classifier.fit(self.frequencies, self.labels) def nuSVC(self): self.classifier = NuSVC() self.classifier.fit(self.frequencies, self.labels) def accurracy(self, text, labels): prediction = self.predict(text) accurracy = 0 for i in range(len(prediction)): if prediction[i] == labels[i]: accurracy += 1 return accurracy / float(len(prediction)) def f1(self, text, actual): prediction = self.predict(text) return f1_score(actual, prediction)
def predict(self, X): if hasattr(self, '_onedal_estimator'): logging.info("sklearn.svm.NuSVC.predict: " + get_patch_message("onedal")) return self._onedal_estimator.predict(X) else: logging.info("sklearn.svm.NuSVC.predict: " + get_patch_message("sklearn")) return sklearn_NuSVC.predict(self, X)
def optimize_clf(nf, optimize=1): acc_list = [ ] #array with accuracies for each pair within each LOOVC fold def nf_select(nf): #fselector = mvpa2.FixedNElementTailSelector(np.round(nf), tail='upper',mode='select', sort=False) #sbfs = mvpa2.SensitivityBasedFeatureSelection(mvpa2.OneWayAnova(), fselector, enable_ca=['sensitivities'], auto_train=True) if (optimize >= 1): not_test_ds = ds[ds.chunks != chunk] val_ds = not_test_ds[not_test_ds.chunks == val_chunk] train_ds = not_test_ds[not_test_ds.chunks != val_chunk] #sbfs.train(train_ds) #train_ds = sbfs(train_ds) #val_ds = sbfs(val_ds) return train_ds, val_ds elif (optimize == 0): train_ds = ds[ds.chunks != chunk] test_ds = ds[ds.chunks == chunk] #sbfs.train(train_ds) #train_ds = sbfs(train_ds) #test_ds = sbfs(test_ds) return train_ds, test_ds train_ds, not_train_ds = nf_select(nf) for y in range(0, len(pair_list2)): def mask(y, train_ds, test_ds): stim_mask1 = (train_ds.targets == pair_list2[y][0]) | ( train_ds.targets == pair_list2[y][1]) stim_mask2 = (not_train_ds.targets == pair_list2[y][0]) | ( not_train_ds.targets == pair_list2[y][1]) ds_temp_train = train_ds[stim_mask1] ds_temp_not_train = not_train_ds[stim_mask2] return ds_temp_train, ds_temp_not_train ds_temp_train, ds_temp_not_train = mask( y, train_ds, not_train_ds) #clf = mvpa2.LinearNuSVMC(nu=0.5)#defines a classifier, linear SVM in this case clf = NuSVC(nu=0.5, max_iter=2000) #clf = SKLLearnerAdapter(knn) #clf = SKLLearnerAdapter(linear_model.SGDClassifier()) #clf.train(ds_temp_train) clf.fit(ds_temp_train.samples, ds_temp_train.targets) #predictions = clf.predict(ds_temp_not_train) predictions = clf.predict(ds_temp_not_train.samples) labels = ds_temp_not_train.targets bool_vec = predictions == labels acc_list.append( sum(bool_vec) / float(len(bool_vec))) #array with accuracies for each pair if (optimize == 1): #print len(acc_list) #print np.mean(acc_list) return 1 - np.mean(acc_list) else: #print np.mean(acc_list), 'for chunk:', chunk return acc_list
def svc(x_train, y_train, x_test, y_test): clf = NuSVC() # class clf.fit(x_train, y_train) # training the svc model result = clf.predict(x_test) # predict the target of testing samples predict_list = result.tolist() cnt_true = 0 for i in range(len(y_test)): if int(predict_list[i]) == int(y_test[i]): cnt_true += 1 print float(cnt_true) / float(len(y_test))
class SentimentAnalysis: #feature_number=400 feature_number = 100 def __init__(self, vec_method="TW", stop_words=()): ''' 构造函数 :param vec_method: 向量化方法: TW, TC, TF, TF-IDF :param pos_data: list of sentences :param neg_data: list of sentences ''' self.__vec_method = vec_method self.__stop_words = stop_words #self.__cut_method=cut_method #if pos_data and neg_data: # self.load_data(pos_data,neg_data) def __get_vectorizer(self): __vectorizer_map = { 'TW': CountVectorizer(binary=True, stop_words=self.__stop_words), 'TC': CountVectorizer(stop_words=self.__stop_words), 'TF': TfidfVectorizer(use_idf=False, stop_words=self.__stop_words), 'TF-IDF': TfidfVectorizer(use_idf=True, stop_words=self.__stop_words), } return __vectorizer_map[self.__vec_method] ''' def load_data(self,pos_data,neg_data,stop_words=()): self.__pos_data=pos_data self.__neg_data=neg_data self.__stop_words=stop_words ''' def __vectorize(self, X): X = [' '.join(words) for words in X] self.__vectorizer = self.__get_vectorizer() return self.__vectorizer.fit_transform(X) def __feature_selection(self, X, y): self.__feature_selector = SelectKBest(chi2, k=self.feature_number) X = self.__feature_selector.fit_transform(X, y) return X def train(self, X, y): X = self.__vectorize(X) X = self.__feature_selection(X, y) self.__clf = NuSVC(nu=0.4, kernel='rbf').fit(X, y) def predict(self, X): X = [' '.join(words) for words in X] X = self.__vectorizer.transform(X) X = self.__feature_selector.transform(X) return self.__clf.predict(X)
def svc_nu(X_train, categories,X_test, test_categories): from sklearn.svm import NuSVC svm_nu_classifier = NuSVC().fit(X_train, categories) y_svm_predicted = svm_nu_classifier.predict(X_test) print '\n Here is the classification report for support vector machine classiffier:' print metrics.classification_report(test_categories, y_svm_predicted) ''''
def train_test_SVM(self, X_train, y_train, X_test, y_test): print('Training SVM Classifier') svm_classifier = NuSVC() svm_classifier.fit(X_train, y_train) print('Testing SVM Classifier') y_pred = svm_classifier.predict(X_test) print(y_pred.shape) cm = confusion_matrix(y_test, y_pred) print(cm)
def build_single_svm(self, feature_pos, feature_un, neg_indices, label): if label != 'tfidf' and label != 'lsi': return False feature_neg = feature_un[neg_indices] feature_left = feature_un[[k for k in range(feature_un.shape[0]) if k not in neg_indices]] clf = NuSVC(nu=0.1, kernel='linear', probability=True) train_feature = vstack((feature_pos, feature_neg)) train_target = np.concatenate((np.ones(feature_pos.shape[0]), -np.ones(feature_neg.shape[0]))) clf.fit(train_feature, train_target) if label == 'tfidf': joblib.dump(clf, self.clf_tfidf_pos_path) else: joblib.dump(clf, self.clf_lsi_pos_path) logger_info.info( str(label) + ' score : ' + str(self.score(clf.predict(feature_pos), train_target[:feature_pos.shape[0]]))) if self.enable_iteration: clf_i = NuSVC(nu=0.1, kernel='linear', probability=True) for i in range(self.iteration): train_feature = vstack((feature_pos, feature_neg)) train_target = np.concatenate((np.ones(feature_pos.shape[0]), -np.ones(feature_neg.shape[0]))) clf_i.fit(train_feature, train_target) if feature_left.shape[0] == 0: break predicts = clf.predict(feature_left) n_indices = [item for item in range(len(predicts)) if predicts[item] != 1] p_indices = [item for item in range(len(predicts)) if predicts[item] == 1] if len(n_indices) > 0: feature_neg = vstack((feature_neg, feature_un[n_indices])) feature_left = feature_left[p_indices] else: break recall = self.score(clf_i.predict(feature_pos), np.ones(feature_pos.shape[0])) logging.info('recall in train sets is : %d' % recall) if recall > 0.95: return clf_i return clf
def _test_nu_svc(self, num_classes, backend="torch", extra_config={}): model = NuSVC() np.random.seed(0) X = np.random.rand(100, 200) X = np.array(X, dtype=np.float32) y = np.random.randint(num_classes, size=100) model.fit(X, y) torch_model = hummingbird.ml.convert(model, backend, X, extra_config=extra_config) self.assertTrue(torch_model is not None) np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)
def testing(): plot_x = range(1, 10) plot_y = [] for i in xrange(1,10): vals = [] for _ in xrange(20): train_data, validation_data, train_labels, validation_labels = split_data() clf = NuSVC(**get_kwargs(i)) clf.fit(train_data, train_labels) vals.append(check_fit(clf.predict(validation_data), validation_labels)) plot_y.append(np.mean(vals)) plot_results(plot_x, plot_y)
def NuSVM(X_train, Y_train, X_test, Y_test): """NuSVM Method Use OneVsRestClassifier for this multi-class problem. And will generate the report for NuSVM Arg: X_train : The data for trainset Y_train : The label for trainset X_test : The data for testset Y_test : The label for testset """ parameters = {'nu': (0.05, 0.02), 'gamma': [3e-2, 2e-2, 1e-2]} svc_clf = NuSVC(nu=0.1, kernel='rbf', verbose=False) gs_clf = GridSearchCV(svc_clf, parameters, verbose=False, n_jobs=24) svc_clf.fit(X_train, Y_train) predicted = svc_clf.predict(X_train) print("Train report of NuSVM ======= ") print(metrics.classification_report(Y_train, predicted)) predicted = svc_clf.predict(X_test) print("Test report of NuSVM ======= ") print(metrics.classification_report(Y_test, predicted))
def nusvc_classifier(dir_models, ticket, x, x_test, y, y_test): print('getting model...NuSVC') clf = NuSVC(nu=0.8) print('training...') clf.fit(x, y) print('predicting...') predicted = clf.predict(x_test) print(classification_report(y_test, predicted)) id = len(os.listdir(dir_models)) joblib.dump(clf, dir_models + ticket + '_nusvc_' + str(id) + '.pkl') return clf.score(x_test, y_test)
def NuSVCMethod(trainData, testData, trainLabel, testLabel): info = {'name': 'NuSVCMethod', 'accuracy': 0, 'time': 0, 'remark': ''} startTime = time.time() from sklearn.svm import NuSVC clf = NuSVC() clf.fit(trainData, trainLabel) labelPred = clf.predict(testData) testAccuracy = accuracy_score(testLabel, labelPred) # print("SVM Test Accuracy: %.2f%%" % (testAccuracy * 100.0)) info['time'] = time.time() - startTime info['accuracy'] = testAccuracy return info
def test_nusvc(): # print '==== NuSVC ====' # print 'Training...' clf = NuSVC() clf = clf.fit( train_data, train_labels ) # print 'Predicting...' output = clf.predict(test_data).astype(int) predictions_file = open("CLF.csv", "wb") open_file_object = csv.writer(predictions_file) open_file_object.writerow(["PassengerId","Survived"]) open_file_object.writerows(zip(test_id, output)) predictions_file.close() # print 'Done.' print 'NuSVC : '
def nu_support_vector_machines(corpus, documents_training, documents_test, words_features, kernel, nu): """ Another implementation of Support Vector Machines algorithm. :param corpus: :param documents_training: :param documents_test: :param words_features: :param kernel: :param nu: :return: """ print print "----- nu-Support Vector Machines algorithm ------" print "Creating Training Vectors..." categories = util_classify.get_categories(corpus) array_vector_training = [] array_categories = [] for (id, original_category, annotations) in documents_training: array_vector_training.append(util_classify.transform_document_in_vector(annotations, words_features, corpus)) array_categories.append(util_classify.get_categories(corpus).index(original_category)) print "Training the algorithm..." classifier = NuSVC(nu=nu, kernel=kernel) X_train_features = [] y_train_categories = [] # Train all for (id, original_category, annotations) in documents_training: X_train_features.append(util_classify.transform_document_in_vector(annotations, words_features, corpus)) y_train_categories.append(original_category) classifier.fit(np.array(X_train_features), np.array(y_train_categories)) print "Calculating metrics..." estimated_categories = [] original_categories = [] for (id, cat_original, annotations) in documents_test: cat_estimated = classifier.predict(np.array((util_classify.transform_document_in_vector(annotations, words_features, corpus)))) estimated_categories.append(categories.index(cat_estimated)) original_categories.append(categories.index(cat_original)) return original_categories, estimated_categories
def predict(self): """ trains the scikit-learn python machine learning algorithm library function https://scikit-learn.org then passes the trained algorithm the features set and returns the predicted y test values form, the function then compares the y_test values from scikit-learn predicted to y_test values passed in then returns the accuracy """ algorithm = NuSVC() algorithm.fit(self.X_train, self.y_train) y_pred = list(algorithm.predict(self.X_test)) self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test) return self.acc
def prediction(parent_text, child_text, type): pair = parent_text + " STOP " + child_text df_x, df_y, model = tfidf([pair], []) if type is 'NuSMV': clf = NuSVC() elif type is 'LinearSMV': clf = LinearSVC() else: clf = DecisionTreeClassifier() clf.fit(model[1:], df_y) prediction = clf.predict(model[0]).tolist() if (prediction[0] is 0): return "Attack" else: return "Support"
def text_spam(text): raw_data = pd.read_excel("hindi_spam.xlsx") E_mails = raw_data i = 0 for e in E_mails['text']: E_mails.text[i] = ''.join(list(map(purify, e))) E_mails.text[i] = E_mails.text[i].split() i = i + 1 E_mails['text'] = list(map(rem_stopwords, E_mails['text'])) email = [] email = (list(map(hi_stem, E_mails['text']))) E_mails['text'] = email for i in range(0, E_mails.shape[0]): E_mails['text'][i] = ' '.join(E_mails['text'][i]) transformer2 = TfidfVectorizer(ngram_range=(1, 1)) counts2 = transformer2.fit_transform(E_mails['text']) NBModel = BernoulliNB().fit(counts2, E_mails['type']) SVCModel = SVC(kernel='linear').fit(counts2, E_mails['type']) NuSVCModel = NuSVC(kernel='linear').fit(counts2, E_mails['type']) RFModel = RandomForestClassifier(n_estimators=50, min_samples_split=3).fit( counts2, E_mails['type']) GBModel = GradientBoostingClassifier(n_estimators=50, min_samples_split=200).fit( counts2, E_mails['type']) counts1 = transformer2.transform([text]) NBpred = NBModel.predict(counts1) SVCpred = SVCModel.predict(counts1) NuSVCpred = NuSVCModel.predict(counts1) RFpred = RFModel.predict(counts1) GBpred = GBModel.predict(counts1) pred_list = [NBpred, SVCpred, NuSVCpred, RFpred, GBpred] pred = max(pred_list, key=pred_list.count) return pred[0]
class NuSVCImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
def classifier_prediction(docs, labels, documents): classifier_MNB = MultinomialNB().fit(docs, labels) classifier_BNB = BernoulliNB().fit(docs, labels) classifier_LR = LogisticRegression().fit(docs, labels) classifier_SGDC = SGDClassifier().fit(docs, labels) classifier_SVC = SVC().fit(docs, labels) classifier_LSVC = LinearSVC().fit(docs, labels) classifier_NuSVC = NuSVC().fit(docs, labels) egs = documents egs_count = vectorizer.transform(egs) # Classifier Predictions # ############################################################### predicted_MNB = classifier_MNB.predict(egs_count) predicted_BNB = classifier_BNB.predict(egs_count) predicted_LR = classifier_LR.predict(egs_count) predicted_SGDC = classifier_SGDC.predict(egs_count) predicted_SVC = classifier_SVC.predict(egs_count) predicted_LSVC = classifier_LSVC.predict(egs_count) predicted_NuSVC = classifier_NuSVC.predict(egs_count) ################################################################ # Classifications # ############################################################################################################################# print("Multinomial Naive Bayes :- " + str(predicted_MNB)) print print("Bernoulli Naive Bayes :- " + str(predicted_BNB)) print print("Logistic Regression :- " + str(predicted_LR)) print print("SGDC :- " + str(predicted_SGDC)) print print("SVC :- " + str(predicted_SVC)) print print("Linear SVC :- " + str(predicted_LSVC)) print print("Nu SVC :- " + str(predicted_NuSVC))
def main(): start = time.time() trainX, trainY = loadImgFeature('trainingSet') testX, testY = loadImgFeature('testSet') clf = NuSVC() clf.fit(trainX,trainY) Z = clf.predict(testX) print("the total error rate is"+str(sum(Z!=testY) / float(len(testY)))) error0,error1,total0,total1 = 0,0,0,0 for i in range(len(Z)): if testY[i] == 0: total0 += 1 if Z[i] != 0: error0 += 1 else: total1 += 1 if Z[i] != 1: error1 += 1 print("\nthe total number of positive sample is %d,the positive sample error rate is %f." % ( total1, error1 / float(total1))) print("\nthe total number of negative sample is %d,the negative sample error rate is %f." % ( total0, error0 / float(total0))) print("spend time:%ss."%(time.time()-start))
# Make a model with the best parameters estimator = NuSVC(kernel='rbf', gamma=clf.best_estimator_.gamma, nu=clf.best_estimator_.nu) # C=clf.best_estimator_.C) # Plot the learning curve to find a good split title = 'NuSVC' plot_learning_curve(estimator, title, X_train, y_train, cv=cv, n_jobs=4) p.savefig("supervised_learning_nusvc.pdf") # Find a good number of test samples before moving on # raw_input("Continue??") # With a good number of test samples found, predict the whole set to the model estimator.fit(X_train, y_train) y_pred = estimator.predict(X_all) DataFrame(y_pred).to_csv("supervised_prediction_labels_nusvc.csv") print(classification_report(y_all, y_pred)) print "Best params are:" + str(clf.best_params_) # Hold here raw_input("Continue??") # Now take the model found, and find the outliers outlier_percent = 0.01 ## FIGURE OUT WHAT TO DO HERE!! # Use dim reduction to look at the space.
for row in spamreader: #print len(row) if len(row) == 11 and "?" not in row : x.append(row[1:10]) y.append(int(row[10])) for i in x: for j in i: temp.append(int(j)) z.append(temp) temp = [] ######################################################################## #NuSVM classifier from sklearn.svm import NuSVC clf = NuSVC() clf.fit(z[1:200], y[1:200]) valid = clf.predict(z[201:698]) for i in valid: if i != y[count+201]: mis+=1 count+=1 print("NuSVM misclassification rate is") print(float(float(mis)/498) * 100) ######################################################################### #Random Forest from sklearn.ensemble import RandomForestClassifier mis = 0 count=0 clf1 = RandomForestClassifier(n_estimators=10) clf1.fit(z[1:200], y[1:200]) RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=1, random_state=0)
def nuSupportVectorClassification(dataFile, outputFolder, NU, kernelType, numDegree, numGamma, coef, isShrink, isProb, tolerance, cacheSize, classWeight, isVerbose, maxIter, decisionShape, randomState,parameters): inputData = yaml.load(open(dataFile)) trainingSet = inputData['training'] testingSet = inputData['testing'] inputFile = inputData['inputFile'] label = inputData['label'] resultSet = [] if not os.path.exists(outputFolder): try: os.makedirs(outputFolder) except OSError as exc: if exc.errno != errno.EEXIST: raise exc pass for i in range(len(trainingSet)): """testPredictions = [] trainLabels = [] trainFeatures = [] trainDataSet = arff.load(trainingSet[i]) for row in trainDataSet: content = list(row) trainFeatures.append(content[0:len(content)-1]) trainLabels.append(content[len(content)-1]) testFeatures = [] testLabels = [] testDataSet = arff.load(testingSet[i]) for row in testDataSet: content = list(row) testFeatures.append(content[0:len(content)-1]) testLabels.append(content[len(content)-1])""" train_df = pd.read_csv(trainingSet[i]) train_labels = train_df[label] train_features = train_df.drop(label,axis=1) test_df = pd.read_csv(testingSet[i]) test_predictions = pd.DataFrame(test_df[label]) test_features = test_df.drop(label,axis=1) svm = NuSVC(nu=NU, kernel=kernelType, degree=numDegree, gamma=numGamma, coef0=coef, shrinking=isShrink, probability=isProb, tol=tolerance, cache_size=cacheSize, class_weight=classWeight, verbose=isVerbose, max_iter=maxIter, decision_function_shape=decisionShape, random_state=randomState) svm.fit(train_features, train_labels) test_predictions['predictions'] = svm.predict(test_features) #testPredictions = np.array(svm.predict(testFeatures)).tolist() resultFile = outputFolder + '/result' + str(i + 1) + '.csv' """with open(resultFile,'w') as outfile: outfile.write('predictions:\n') outfile.write(yaml.dump(testPredictions, default_flow_style=False)) outfile.write('true_labels:\n') outfile.write(yaml.dump(testLabels, default_flow_style=False))""" test_predictions.to_csv(resultFile,index=False) resultSet.append(resultFile) resultDict = dict() #parameters = dict() resultDict['results'] = resultSet resultDict['label'] = label """parameters['parameter.n'] = NU parameters['parameter.k'] = kernelType parameters['parameter.d'] = numDegree parameters['parameter.g'] = numGamma parameters['parameter.f'] = coef parameters['parameter.s'] = isShrink parameters['parameter.p'] = isProb parameters['parameter.t'] = tolerance parameters['parameter.a'] = cacheSize parameters['parameter.w'] = classWeight parameters['parameter.v'] = isVerbose parameters['parameter.i'] = maxIter parameters['parameter.e'] = decisionShape parameters['parameter.r'] = randomState""" if not parameters: parameters['parameter']='default' resultDict['algo_params'] = parameters resultDict['split_params'] = inputData['split_params'] if 'feature_selection_parameters' in inputData: resultDict['feature_selection_parameters'] = inputData['feature_selection_parameters'] resultDict['feature_selection_algorithm'] = inputData['feature_selection_algorithm'] if 'feature_extraction_parameters' in inputData: resultDict['feature_extraction_parameters'] = inputData['feature_extraction_parameters'] resultDict['feature_extraction_algorithm'] = inputData['feature_extraction_algorithm'] if 'preprocessing_params' in inputData: resultDict['preprocessing_params'] = inputData['preprocessing_params'] resultDict['inputFile'] = inputFile resultDict['algorithm'] = "NuSupportVectorClassification" yaml.dump(resultDict, open(outputFolder + '/results.yaml', 'w'))
Ytest = np.ones((testshape[0])) Ytest_predict = np.ones((testshape[0])) Ytest[0:100] = 1 Ytest[100:200] = 2 Ytest[200:300] = 3 clf = NuSVC() clf.fit(Xtrain, Ytrain) NuSVC(cache_size=2000, class_weight=None, coef0=0.0, decision_function_shape=None, degree=3, gamma='auto', kernel='polynomial', max_iter=-1, nu=0.5, probability=False, random_state=None, shrinking=True, tol=0.00001, verbose=False) for i in range(trainshape[0]): Ytrain_predict[i] = clf.predict([Xtrain[i, :]]) for i in range(valshape[0]): Yval_predict[i] = clf.predict([Xval[i, :]]) for i in range(testshape[0]): Ytest_predict[i] = clf.predict([Xtest[i, :]]) Y = np.concatenate((Ytrain, Yval, Ytest)) Y_predict = np.concatenate((Ytrain_predict, Yval_predict, Ytest_predict)) cm_train = confusion_matrix(Ytrain, Ytrain_predict) cm_val = confusion_matrix(Yval, Yval_predict) cm_test = confusion_matrix(Ytest, Ytest_predict) cm = confusion_matrix(Y, Y_predict)
def myfunc(context, data): price_history = data.history(context.security_list, fields="price", bar_count=100, frequency="1d") try: # For loop for each stock traded everyday: for s in context.security_list: start_bar = context.feature_window price_list = price_history[s].tolist() past = data.current(s, 'past_data') pastlist = custom_split(past) #print isinstance(past, str) #print isinstance(custom_split(past), list) print pastlist print len(past) print len(pastlist) print len(price_list) #print past[1:-1] X = [] y = [] bar = start_bar # Loop for each machine learning data set while bar < len(price_list) - 1: # print s," price: ",data.history(s, 'price', 100 , "1d") try: end_price = price_list[bar] start_price = price_list[bar - 1] features = pastlist[(bar - 3) * 4:bar * 4] # Featuers are the attribute value used for machine learning. #print(features) if end_price > start_price: label = 1 else: label = -1 # Label is the indicator of whether this stock will rise or fall bar += 1 X.append(features) y.append(label) #print X #print y except Exception as e: bar += 1 print(('feature creation', str(e))) print('len(X1)', len(X)) # Call the machined learning model clf1 = RandomForestClassifier(n_estimators=100) clf2 = LinearSVC() clf3 = NuSVC() clf4 = LogisticRegression() # Rrepare the attribute information for prediction current_features = pastlist[384:396] X.append(current_features) print('len(X2)', len(X)) # Rescall all the data X = preprocessing.scale(X) current_features = X[-1:] X = X[:-1] print current_features print('len(X)', len(X)) print('len(y)', len(y)) # Build the model clf1.fit(X, y) clf2.fit(X, y) clf3.fit(X, y) clf4.fit(X, y) # Predict the results p1 = clf1.predict(current_features)[0] p2 = clf2.predict(current_features)[0] p3 = clf3.predict(current_features)[0] p4 = clf4.predict(current_features)[0] # If 3 out of 4 prediction votes for one same results, this results will be promted to be the one I will use. if Counter([p1, p2, p3, p4]).most_common(1)[0][1] >= 3: p = Counter([p1, p2, p3, p4]).most_common(1)[0][0] else: p = 0 print(('Prediction', p)) current_price = data.current(s, 'price') current_position = context.portfolio.positions[s].amount cash = context.portfolio.cash # Add one more feature: moving average print('price_list', price_list) sma_50 = numpy.mean(price_list[-50:]) sma_20 = numpy.mean(price_list[-20:]) print('sma_20', sma_20) print('sma_50', sma_50) open_orders = get_open_orders() # Everyday's trading activities: if (p == 1) or (sma_20 > sma_50): if s not in open_orders: order_target_percent( s, context.weight, style=StopOrder(context.stop_loss_pct * current_price)) cash -= context.investment_size elif (p == -1) or (sma_50 > sma_20): if s not in open_orders: order_target_percent(s, -context.weight) except Exception as e: print(str(e))
y_test = labels[272:,i] else: X_train = training y_train = labels[:172,i] X_test = sampletest y_test = labels[172:,i] #best case: 67, 1 posterior = np.empty([100,72,6]) for j in range(1,67): for k in range(1,2): box = np.zeros([6,6]) accuracy = np.zeros(72) for m in range(0,10): nsvc = NuSVC(nu=j/100.0, degree=k) nsvc.fit(X_train, y_train) y_pred = nsvc.predict(X_test) n=0 for i in range(0,len(y_pred)): if y_pred[i] == y_test[i]: #print i, y_pred[i], y_test[i] n = n+1 accuracy[i] = accuracy[i]+1 box[y_test[i]-1,y_pred[i]-1] = box[y_test[i]-1,y_pred[i]-1] + 1 #posterior[m] = knc.predict_proba(X_test) #print j, k, np.mean(accuracy)/0.72, np.std(accuracy)/0.72 print j, k, sum(accuracy[0:8])/8.0, sum(accuracy[8:18])/10.0, sum(accuracy[18:30])/12.0, sum(accuracy[56:72])/16.0, sum(accuracy[30:43])/13.0, sum(accuracy[43:56])/13.0, sum(accuracy)/72.0 ''' means = np.empty([72,6]) stds = np.empty([72,6]) grid = np.empty([6,6])
cm = [None] * subjects for subject in range(subjects): # Concatenate the subjects' data for training into one matrix train_subjects = list(range(subjects)) train_subjects.remove(subject) TRs = image_data_shared[0].shape[1] train_data = np.zeros((image_data_shared[0].shape[0], len(train_labels))) for train_subject in range(len(train_subjects)): start_index = train_subject*TRs end_index = start_index+TRs train_data[:, start_index:end_index] = image_data_shared[train_subjects[train_subject]] # Train a Nu-SVM classifier using scikit learn classifier = NuSVC(nu=0.5, kernel='linear') classifier = classifier.fit(train_data.T, train_labels) # Predict on the test data predicted_labels = classifier.predict(image_data_shared[subject].T) accuracy[subject] = sum(predicted_labels == test_labels)/float(len(predicted_labels)) # Create a confusion matrix to see the accuracy of each class cm[subject] = confusion_matrix(test_labels, predicted_labels) # Normalize the confusion matrix cm[subject] = cm[subject].astype('float') / cm[subject].sum(axis=1)[:, np.newaxis] # Plot and print the results plot_confusion_matrix(cm, title="Confusion matrices for different test subjects with Probabilistic SRM") print("SRM: The average accuracy among all subjects is {0:f} +/- {1:f}".format(np.mean(accuracy), np.std(accuracy)))
def final_run(): clf = NuSVC() clf.fit(train, trainLabels) predict_labels = clf.predict(test) write_out(predict_labels)
def test_full(): clf = NuSVC() clf.fit(train, trainLabels) print check_fit(clf.predict(train), trainLabels)
train_x, test_x = x[train_index], x[test_index] train_y, test_y = y[train_index], y[test_index] gnb.fit(train_x, train_y) gnbrec.append( float((gnb.predict(test_x) == test_y).sum())/ len(test_y) ) nonneg_train_x = train_x - train_x.min() nonneg_test_x = test_x - test_x.min() mnb.fit(nonneg_train_x, train_y) mnbrec.append( float((mnb.predict(nonneg_test_x) == test_y).sum())/ len(test_y) ) bnb.fit(train_x, train_y) bnbrec.append( float((bnb.predict(test_x) == test_y).sum())/ len(test_y) ) svm.fit(train_x, train_y) svmrec.append( float((svm.predict(test_x) == test_y).sum())/ len(test_y) ) _ = PCA(n_components=20).fit(train_x) train_x = _.transform(train_x) test_x = _.transform(test_x) print train_x.shape L = lmnn.fit(train_x, train_y, verbose=True).L lmnnrec.append( knn(np.dot(train_x, L), train_y, np.dot(test_x, L), test_y, K=5) ) print '\tSVM accuracy: {} = {}'.format(svmrec, np.mean(svmrec)) print '\tLMNN accuracy: {} = {}'.format(lmnnrec, np.mean(lmnnrec)) print '\tGaussianNB accuracy: {} = {}'.format(gnbrec, np.mean(gnbrec)) print '\tMultinomiaNB accuracy: {} = {}'.format(mnbrec, np.mean(mnbrec)) print '\tBernoulliNB accuracy: {} = {}'.format(bnbrec, np.mean(bnbrec))
linearSvc=LinearSVC( penalty='l2', #l1 l2 loss='squared_hinge', #'hinge'或者'squared_hinge'指定损失函数。 'hinge'是标准的SVM损失(例如由SVC类使用),而'squared_hinge'是 'hinge'的平方。 dual=False, # 优化问题。当n_samples(样本数)> n_features(属性数)时,优先使用dual = False。 tol=1e-4, C=1.0, multi_class='ovr', #'ovr'、'crammer_singer' 多个分类时会有影响 fit_intercept=True, #是否计算截距 intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000 ) svc.fit(X,y) nuSvc.fit(X,y) linearSvc.fit(X,y) #我们先来简单的预测一下 Z=np.array([[-1,-2]]) y_svc=svc.predict(Z) y_nuSvc=nuSvc.predict(Z) y_LSvc=linearSvc.predict(Z) #Z[0]被分到了第一类 print(y_svc) print(y_nuSvc) print(y_LSvc)
accu = sum(pred_label == tst_label)/float(len(pred_label)) """ print 'standardization' #print trn_data #print tst_data #trn_data_scaled = preprocessing.scale(trn_data) #tst_data_scaled = preprocessing.scale(tst_data) scaler = preprocessing.StandardScaler().fit(trn_data) trn_data_scaled = scaler.transform(trn_data) tst_data_scaled = scaler.transform(tst_data) #print trn_data_scaled #print tst_data_scaled clf = NuSVC(nu=0.5, kernel = 'linear') clf.fit(trn_data_scaled, trn_label) pred_label = clf.predict(tst_data_scaled) print pred_label print clf.decision_function(tst_data_scaled) accu = sum(pred_label == tst_label)/float(len(pred_label)) if args.align_algo in ['ppca_idvclas','pica_idvclas']: for it in range(11): np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(it)+'.npz',accu = accu) else: np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(itr)+'.npz',accu = accu) #np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(10)+'.npz',accu = accu) print options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(itr)+'.npz' print np.mean(accu)
accu = sum(pred_label == tst_label)/float(len(pred_label)) """ print 'standardization' #print trn_data #print tst_data #trn_data_scaled = preprocessing.scale(trn_data) #tst_data_scaled = preprocessing.scale(tst_data) scaler = preprocessing.StandardScaler().fit(trn_data) trn_data_scaled = scaler.transform(trn_data) tst_data_scaled = scaler.transform(tst_data) #print trn_data_scaled #print tst_data_scaled clf = NuSVC(nu=0.5, kernel='linear') clf.fit(trn_data_scaled, trn_label) pred_label = clf.predict(tst_data_scaled) print pred_label print clf.decision_function(tst_data_scaled) accu = sum(pred_label == tst_label) / float(len(pred_label)) if args.align_algo in ['ppca_idvclas', 'pica_idvclas']: for it in range(11): np.savez_compressed(options['working_path'] + opt_group_folder + args.align_algo + '_acc_' + str(it) + '.npz', accu=accu) else: np.savez_compressed(options['working_path'] + opt_group_folder + args.align_algo + '_acc_' + str(itr) + '.npz', accu=accu) #np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(10)+'.npz',accu = accu) print options[
def handle_data(context, data): prices = history(bar_count = context.historical_bars, frequency='1d', field='price') for stock in context.stocks: try: # create moving averages for 50 and 200 days to filter the results that we want # to get out of the nueral network. ma1 = data[stock].mavg(50) ma2 = data[stock].mavg(200) start_bar = context.feature_window price_list = prices[stock].tolist() X = [] y = [] bar = start_bar # feature creation # this is where I build out the Neural Network that # learns from the history of the stocks. while bar < len(price_list)-1: try: end_price = price_list[bar+1] begin_price = price_list[bar] pricing_list = [] xx = 0 for _ in range(context.feature_window): price = price_list[bar-(context.feature_window-xx)] pricing_list.append(price) xx += 1 features = np.around(np.diff(pricing_list) / pricing_list[:-1] * 100.0, 1) # print(features) if end_price > begin_price: label = 1 else: label = -1 bar += 1 X.append(features) y.append(label) except Exception as e: bar += 1 print(('feature creation',str(e))) clf1 = RandomForestClassifier() clf2 = LinearSVC() clf3 = NuSVC() clf4 = LogisticRegression() last_prices = price_list[-context.feature_window:] current_features = np.around(np.diff(last_prices) / last_prices[:-1] * 100.0, 1) X.append(current_features) X = preprocessing.scale(X) current_features = X[-1] X = X[:-1] clf1.fit(X,y) clf2.fit(X,y) clf3.fit(X,y) clf4.fit(X,y) p1 = clf1.predict(current_features)[0] p2 = clf2.predict(current_features)[0] p3 = clf3.predict(current_features)[0] p4 = clf4.predict(current_features)[0] if Counter([p1,p2,p3,p4]).most_common(1)[0][1] >= 4: p = Counter([p1,p2,p3,p4]).most_common(1)[0][0] else: p = 0 print(('Prediction',p)) if p == 1 and ma1 > ma2: order_target_percent(stock,0.33) elif p == -1 and ma1 < ma2: order_target_percent(stock,-0.33) except Exception as e: print(str(e)) record('ma1',ma1) record('ma2',ma2) record('Leverage',context.account.leverage)
print svc_new.score(test_x_reduced, test_y_practice) print 'Predicting' estimator = SelectKBest(score_func=f_classif, k=components) estimator.fit(train_x, train_y_leaderboard) train_x_reduced = estimator.transform(train_x) test_x_reduced = estimator.transform(test_x) print train_x.shape print train_x_reduced.shape #svc_new = SVC(probability=True, C=.000001, kernel='poly', gamma=4, # degree=4) svc_new = NuSVC(kernel='poly', probability=True, gamma=0, nu=.5852, tol=.00001) svc_new.fit(train_x_reduced, train_y_leaderboard) output = svc_new.predict(test_x_reduced) """ """ print 'Outputting' open_file_object = csv.writer(open( "simple" + str(datetime.now().isoformat()) + ".csv", "wb")) open_file_object.writerow(['case_id', 'Target_Leaderboard']) i = 0 for row in entries: open_file_object.writerow([row, output[i].astype(np.uint8)]) i += 1 print 'Done' print datetime.now() - start
# Time = 7276.782202 # Saving data joblib.dump(clf, learning_model_path) ########### Testing #################################### print("Making Testing Data...") test_data = np.array(p.read_csv(filepath_or_buffer=csv_test_path, header=None, sep=',', index_col=0))[:, :] test_label = np.ravel(np.array(p.read_csv(filepath_or_buffer=csv_test_path, header=None, sep=',', usecols=[0]))[:, :]) print("Calculating Score...") predict = clf.predict(test_data) from sklearn.metrics import accuracy_score print(accuracy_score(test_label, predict)) from sklearn.metrics import classification_report print(classification_report(test_label, predict)) from sklearn import metrics print ( metrics.confusion_matrix(test_label, predict) ) ########### Results #################################### # SVM
max_features=None) clf5.fit(X_train, y_train) pred5 = clf5.predict(X_test) acc5 = accuracy_score(pred5, y_test) cm5 = confusion_matrix(y_test, pred5) f5 = np.sum(cm5, axis=1) cm5 = cm5 / f5 * 100 print('Accuracy: {:.2f}%'.format(acc5 * 100)) fig, ax = plt.subplots(figsize=(8, 6), dpi=100) ax = sns.heatmap(cm5, annot=True, ax=ax, fmt='.2f') from sklearn.svm import NuSVC clf6 = NuSVC() clf6.fit(X_train, y_train) pred6 = clf6.predict(X_test) acc6 = accuracy_score(pred6, y_test) cm6 = confusion_matrix(y_test, pred6) f6 = np.sum(cm6, axis=1) cm6 = cm6 / f6 * 100 print('Accuracy: {:.2f}%'.format(acc6 * 100)) fig, ax = plt.subplots(figsize=(8, 6), dpi=100) ax = sns.heatmap(cm6, annot=True, ax=ax, fmt='.2f') #saving the models clfP = pickle.dumps(clf, open('../data/clf.sav', 'wb')) clfP2 = pickle.dumps(clf2, open('../data/clf2.sav', 'wb')) clfP3 = pickle.dumps(clf3, open('../data/clf3.sav', 'wb')) clfP4 = pickle.dumps(clf4, open('../data/clf4.sav', 'wb')) clfP5 = pickle.dumps(clf5, open('../data/clf5.sav', 'wb'))
clf_LogisticRegression = LogisticRegression(n_jobs=-1).fit(X_train, y_train) predicted_LogisticRegression = clf_LogisticRegression.predict(X_test) accuracy_LogisticRegression = np.mean(predicted_LogisticRegression == y_test) clf_LogisticRegression_f = open("pickled_algos/clf_LogisticRegression.pickle", "wb") pickle.dump(clf_LogisticRegression, clf_LogisticRegression_f) clf_LogisticRegression_f.close() print('LogisticRegression accuracy: %s' %accuracy_LogisticRegression) print(metrics.classification_report(predicted_LogisticRegression, y_test)) #NuSVC Classifier clf_NuSVC = NuSVC().fit(X_train, y_train) predicted_NuSVC = clf_NuSVC.predict(X_test) accuracy_NuSVC = np.mean(predicted_NuSVC == y_test) clf_NuSVC_f = open("pickled_algos/clf_NuSVC.pickle", "wb") pickle.dump(clf_NuSVC, clf_NuSVC_f) clf_NuSVC_f.close() print('NuSVC accuracy: %s' %accuracy_NuSVC) print(metrics.classification_report(predicted_NuSVC, y_test)) #LinearSVC Classifier clf_LinearSVC = LinearSVC().fit(X_train, y_train) predicted_LinearSVC = clf_LinearSVC.predict(X_test) accuracy_LinearSVC = np.mean(predicted_LinearSVC == y_test)
for train_index, test_index in kf.split(XX): #print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = XX[train_index], XX[test_index] y_train, y_test = y[train_index], y[test_index] clf = NuSVC() clf.fit(X_train, y_train) NuSVC(cache_size=200, class_weight=None, coef0=0.0, decision_function_shape=None, degree=1, gamma='auto', kernel='rbf', max_iter=-1, nu=0.5, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) y_predict = clf.predict(X_test) for i in range(0, len(y_predict) - 1): if y_predict[i] == y_test[i]: dum[ii] = 1 ii = ii + 1 else: dum[ii] = 0 ii = ii + 1 print("Accuracy is") print((np.sum(dum) / len(XX)) * 100, "%")
if model_type == 'lsi': cs_vec = get_cs_vec(hyp1, hyp2, ref) elif model_type == 'lda': cs_vec = get_cs_vec(hyp1, hyp2, ref) #cs1 = kl_divergence(v_hyp1, v_ref) #cs2 = kl_divergence(v_hyp2, v_ref) pass lm_vec = get_lm_vec(hyp1, hyp2, ref) train_sample = [] train_sample += cs_vec train_sample += lm_vec train_sample += m_vec P.append(train_sample) print 'test samples', np.shape(np.array(P)) print 'training classifiers...' for k in ['rbf', 'linear']: simpleclf = SVC(kernel=k, cache_size=4000) simpleclf.fit(np.array(X), np.array(answers[st:sp]), sample_weight=np.array(sample_weights[st:sp])) scores = cross_validation.cross_val_score(simpleclf, np.array(X), np.array(answers[st:sp])) print 'SVC:', simpleclf.kernel, 'CV(3)',sum(scores) / len(scores) preditions = simpleclf.predict(np.array(P)) np.savetxt('SVC-' + simpleclf.kernel + '.3.pred', preditions, fmt='%d') nuclf = NuSVC(kernel=k, cache_size=4000) nuclf.fit(np.array(X), np.array(answers[st:sp]), sample_weight=np.array(sample_weights[st:sp])) scores = cross_validation.cross_val_score(nuclf, np.array(X), np.array(answers[st:sp])) print 'NuSVC:', nuclf.kernel, 'CV(3)',sum(scores) / len(scores) preditions = nuclf.predict(np.array(P)) np.savetxt('NuSVC-' + nuclf.kernel + '.3.pred', preditions, fmt='%d')