Exemplo n.º 1
0
def nusvc_model(x_train, y_train, x_val, y_val, x_test, testid):
    scaler = StandardScaler()
    x_stand_train = scaler.fit_transform(x_train)
    x_stand_val = scaler.transform(x_val)
    x_stand_test = scaler.transform(x_test)

    #nus = [0.05,0.1,0.2,0.3,0.4,0.5,0.6]
    #for i in range(len(nus)):
    clf = NuSVC(nu=0.3,
                kernel="linear",
                probability=True,
                decision_function_shape="ovo",
                gamma="scale",
                class_weight="balanced")
    clf.fit(x_train, y_train)

    y_pred_val = clf.predict(x_val)
    BMAC = balanced_accuracy_score(y_val, y_pred_val)
    print("BMAC of this model: ", BMAC)
    print("\n")
    print("=" * 30)

    y_pred = clf.predict(x_test)

    return y_pred, testid
Exemplo n.º 2
0
class RbfSVM:
	def __init__(self):
		self.clf = NuSVC(nu=0.7, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, verbose=False, max_iter=-1)
		self.pattern ='(?u)\\b[A-Za-z]{3,}'
		self.tfidf = TfidfVectorizer(sublinear_tf=False, use_idf=True, smooth_idf=True, stop_words='english', token_pattern=self.pattern, ngram_range=(1, 3))
	def train(self,fileName):
		print "RbfSVM Classifier is being trained"
		table = pandas.read_table(fileName, sep="\t", names=["cat", "message"])
		X_train = self.tfidf.fit_transform(table.message)
		Y_train = []
		for item in table.cat:
			Y_train.append(int(item)) 
		self.clf.fit(X_train, Y_train)
		print "RbfSVM Classifier has been trained"

	def classify(self,cFileName, rFileName):
		table = pandas.read_table(cFileName, names=["message"])
		X_test = self.tfidf.transform(table.message)
		print "Data have been classified"
		with open(rFileName,'w') as f:
			for item in self.clf.predict(X_test).astype(str):
				f.write(item+'\n')

	def validate(self,fileName):
		table = pandas.read_table(fileName, sep="\t", names=["cat", "message"])
		X_validate = self.tfidf.transform(table.message)
		Y_validated = self.clf.predict(X_validate).astype(str)
		totalNum = len(table.cat)
		errorCount = 0
		for i in range(0,totalNum):
			if int(table.cat[i])!=int(Y_validated[i]):
				errorCount += 1
		print "Data have been validated! Precision={}".format((totalNum-errorCount)/float(totalNum))
Exemplo n.º 3
0
class svm():
    def __init__(self):
        # self.clf = SVC(kernel='rbf')
        self.clf = NuSVC()

    def train(self, inputs):
        # Parameters:
        #     inputs: An array of Input objects containing input vectors along with their corresponding labels.

        # Creates lists to use for fitting model
        X = []
        Y = []
        for data in inputs:
            X.append((data.x/np.linalg.norm(data.x)))
            Y.append(data.y)
        # Fit model
        self.clf.fit(X, Y)

    def predict(self, input):
        # Parameters:
        #     input: An Input object containing an input vector to be used for predicting a label.

        x = input.x/np.linalg.norm(input.x)
        if isinstance(input, Input):
            return self.clf.predict(x)
        else:
            x = input/np.linalg.norm(input)
            return self.clf.predict(x)
Exemplo n.º 4
0
def Nusvc(X,y,xtest):
    print("Nu Support-Vector-Machine")
    from sklearn.svm import NuSVC
    clf = NuSVC(random_state=0,gamma='auto')
    clf.fit(X, y) 
    y_pred=clf.predict(xtest)
    return y_pred
Exemplo n.º 5
0
def fd_svm_time_prior(train, test, ytrain, ytest, seq, k):
    for i in range(len(train) - seq + 1):
        for j in range(1, seq):
            train[i] = train[i] + train[i + j]
    train = train[:-seq + 1]
    train = np.array(train).astype('float64')
    train_y = np.array(ytrain[seq - 1:]).astype('float64')
    for i in range(len(test) - seq + 1):
        for j in range(1, seq):
            test[i] = test[i] + test[i + j]
    test = test[:-seq + 1]
    test = np.array(test).astype('float64')
    test_y = np.array(ytest[seq - 1:]).astype('float64')
    clf = NuSVC()
    clf.fit(train, train_y)
    predict_y = clf.predict(test)
    # return clf.predict(test)
    predict_y = list(predict_y)
    for i in range(len(predict_y) - k + 1):
        if 0 in set(predict_y[i:i + k]):
            continue
        else:
            for j in range(i + k, len(predict_y)):
                predict_y[j] = 1
            break

    for i in range(len(predict_y)):
        if predict_y[i] == test_y[i]:
            predict_y[i] = 1
        else:
            predict_y[i] = 0
    return np.average(predict_y)
Exemplo n.º 6
0
def cross_validation(type):

    f1 = 0
    acc = 0
    skf = StratifiedKFold(n_splits=8)

    df_x, df_y, model = tfidf([], [])
    df_x = model

    if type is 'NuSMV':
        clf = NuSVC()
    elif type is 'LinearSMV':
        clf = LinearSVC()
    else:
        clf = DecisionTreeClassifier()

    for train_index, test_index in skf.split(df_x, df_y):
        x_train, x_test = df_x[train_index], df_x[test_index]
        y_train, y_test = df_y[train_index], df_y[test_index]
        clf.fit(x_train, y_train)
        prediction = clf.predict(x_test)
        # print(classification_report(y_test, prediction))
        f1 += f1_score(y_test, prediction, average='weighted')
        acc += accuracy_score(y_test, prediction)

    return f1 / 8, acc / 8
Exemplo n.º 7
0
def predict_loo(transformed_data, args, trn_label ,tst_label):
  print 'imgpred loo',
  print args.loo,
  sys.stdout.flush()

  (ndim, nsample , nsubjs) = transformed_data.shape

  loo = args.loo
  loo_idx = range(nsubjs)
  loo_idx.remove(loo)

  #tst_data = np.zeros(shape = (ndim,nsample))
  trn_data = np.zeros(shape = (ndim,(nsubjs-1)*nsample))
  # image stimulus prediction
  # tst_data : ndim x nsample
  tst_data = transformed_data[:,:,loo]

  for m in range(len(loo_idx)):
    trn_data[:,m*nsample:(m+1)*nsample] = transformed_data[:,:,loo_idx[m]]
  
  # scikit-learn svm for classification
  clf = NuSVC(nu=0.5, kernel = 'linear')
  clf.fit(trn_data.T, trn_label)
  pred_label = clf.predict(tst_data.T)
      
  accu = sum(pred_label == tst_label)/float(len(pred_label))

  return accu
Exemplo n.º 8
0
def predict(transformed_data, args, trn_label, tst_label):
    print 'imgpred',
    sys.stdout.flush()

    (ndim, nsample, nsubjs) = transformed_data.shape
    accu = np.zeros(shape=nsubjs)

    tst_data = np.zeros(shape=(ndim, nsample))
    trn_data = np.zeros(shape=(ndim, (nsubjs - 1) * nsample))
    # image stimulus prediction
    for tst_subj in range(nsubjs):
        tst_data = transformed_data[:, :, tst_subj]

        trn_subj = range(nsubjs)
        trn_subj.remove(tst_subj)

        for m in range(nsubjs - 1):
            trn_data[:, m * nsample:(m + 1) *
                     nsample] = transformed_data[:, :, trn_subj[m]]

        # scikit-learn svm for classification
        #clf = NuSVC(nu=0.5, kernel = 'linear')
        clf = NuSVC(nu=0.5, kernel='linear')
        clf.fit(trn_data.T, trn_label)

        pred_label = clf.predict(tst_data.T)

        accu[tst_subj] = sum(pred_label == tst_label) / float(len(pred_label))

    return accu
Exemplo n.º 9
0
def predict(transformed_data, args, trn_label ,tst_label):
  print 'imgpred',
  sys.stdout.flush()
  
  (ndim, nsample , nsubjs) = transformed_data.shape
  accu = np.zeros(shape=nsubjs)

  tst_data = np.zeros(shape = (ndim,nsample))
  trn_data = np.zeros(shape = (ndim,(nsubjs-1)*nsample))
  # image stimulus prediction 
  for tst_subj in range(nsubjs):
    tst_data = transformed_data[:,:,tst_subj]

    trn_subj = range(nsubjs)
    trn_subj.remove(tst_subj)

    for m in range(nsubjs-1):
      trn_data[:,m*nsample:(m+1)*nsample] = transformed_data[:,:,trn_subj[m]]

    # scikit-learn svm for classification
    #clf = NuSVC(nu=0.5, kernel = 'linear')
    clf = NuSVC(nu=0.5, kernel = 'linear')
    clf.fit(trn_data.T, trn_label)

    pred_label = clf.predict(tst_data.T)
      
    accu[tst_subj] = sum(pred_label == tst_label)/float(len(pred_label))

  return accu
Exemplo n.º 10
0
def predict_loo(transformed_data, args, trn_label, tst_label):
    print 'imgpred loo',
    print args.loo,
    sys.stdout.flush()

    (ndim, nsample, nsubjs) = transformed_data.shape

    loo = args.loo
    loo_idx = range(nsubjs)
    loo_idx.remove(loo)

    #tst_data = np.zeros(shape = (ndim,nsample))
    trn_data = np.zeros(shape=(ndim, (nsubjs - 1) * nsample))
    # image stimulus prediction
    # tst_data : ndim x nsample
    tst_data = transformed_data[:, :, loo]

    for m in range(len(loo_idx)):
        trn_data[:,
                 m * nsample:(m + 1) * nsample] = transformed_data[:, :,
                                                                   loo_idx[m]]

    # scikit-learn svm for classification
    clf = NuSVC(nu=0.5, kernel='linear')
    clf.fit(trn_data.T, trn_label)
    pred_label = clf.predict(tst_data.T)

    accu = sum(pred_label == tst_label) / float(len(pred_label))

    return accu
Exemplo n.º 11
0
def nu(newX, y, newDev, devLabel):
    clNu = NuSVC(gamma='scale')

    clNu.fit(newX, y)

    nuResult = clNu.predict(newDev)

    finalResult = nuResult != devLabel
Exemplo n.º 12
0
class Classifier:
	def __init__(self, objective_data, subjective_data):
		OBJECTIVE = 0
		SUBJECTIVE = 1

		self.objective_data = objective_data
		self.subjective_data = subjective_data

		self.text = objective_data + subjective_data

		self.labels = [OBJECTIVE for i in objective_data] + [SUBJECTIVE for i in subjective_data]

		tuple_list = zip(self.text, self.labels)

		random.shuffle(tuple_list)

		self.text = [x for x,y in tuple_list]
		self.label = [y for x,y in tuple_list]

		self.count_vectorizer = CountVectorizer(stop_words="english", min_df=3)

		# count vectorizer and specific classifier that will be used

		self.counts = self.count_vectorizer.fit_transform(self.text)
		self.classifier = None

		self.tf_transformer = TfidfTransformer(use_idf=True)
		self.frequencies = self.tf_transformer.fit_transform(self.counts)

	def multinomialNB(self):
		self.classifier = MultinomialNB(alpha=.001)
		self.classifier.fit(self.frequencies, self.labels)

	def predict(self, examples):
		example_counts = self.count_vectorizer.transform(examples)
		example_tf = self.tf_transformer.transform(example_counts)
		predictions = self.classifier.predict(example_tf)
		return predictions

	def linearSVC(self):
  		self.classifier = LinearSVC()
  		self.classifier.fit(self.frequencies, self.labels)

  	def nuSVC(self):
  		self.classifier = NuSVC()
  		self.classifier.fit(self.frequencies, self.labels)

  	def accurracy(self, text, labels):
  		prediction = self.predict(text)
  		accurracy = 0
  		for i in range(len(prediction)):
  			if prediction[i] == labels[i]:
  				accurracy += 1
  		return accurracy / float(len(prediction))

  	def f1(self, text, actual):
  		prediction = self.predict(text)
  		return f1_score(actual, prediction)
Exemplo n.º 13
0
 def predict(self, X):
     if hasattr(self, '_onedal_estimator'):
         logging.info("sklearn.svm.NuSVC.predict: " +
                      get_patch_message("onedal"))
         return self._onedal_estimator.predict(X)
     else:
         logging.info("sklearn.svm.NuSVC.predict: " +
                      get_patch_message("sklearn"))
         return sklearn_NuSVC.predict(self, X)
        def optimize_clf(nf, optimize=1):
            acc_list = [
            ]  #array with accuracies for each pair within each LOOVC fold

            def nf_select(nf):
                #fselector = mvpa2.FixedNElementTailSelector(np.round(nf), tail='upper',mode='select', sort=False)
                #sbfs = mvpa2.SensitivityBasedFeatureSelection(mvpa2.OneWayAnova(), fselector, enable_ca=['sensitivities'], auto_train=True)
                if (optimize >= 1):
                    not_test_ds = ds[ds.chunks != chunk]
                    val_ds = not_test_ds[not_test_ds.chunks == val_chunk]
                    train_ds = not_test_ds[not_test_ds.chunks != val_chunk]
                    #sbfs.train(train_ds)
                    #train_ds = sbfs(train_ds)
                    #val_ds = sbfs(val_ds)
                    return train_ds, val_ds
                elif (optimize == 0):
                    train_ds = ds[ds.chunks != chunk]
                    test_ds = ds[ds.chunks == chunk]
                    #sbfs.train(train_ds)
                    #train_ds = sbfs(train_ds)
                    #test_ds = sbfs(test_ds)
                    return train_ds, test_ds

            train_ds, not_train_ds = nf_select(nf)
            for y in range(0, len(pair_list2)):

                def mask(y, train_ds, test_ds):
                    stim_mask1 = (train_ds.targets == pair_list2[y][0]) | (
                        train_ds.targets == pair_list2[y][1])
                    stim_mask2 = (not_train_ds.targets == pair_list2[y][0]) | (
                        not_train_ds.targets == pair_list2[y][1])
                    ds_temp_train = train_ds[stim_mask1]
                    ds_temp_not_train = not_train_ds[stim_mask2]
                    return ds_temp_train, ds_temp_not_train

                ds_temp_train, ds_temp_not_train = mask(
                    y, train_ds, not_train_ds)
                #clf = mvpa2.LinearNuSVMC(nu=0.5)#defines a classifier, linear SVM in this case
                clf = NuSVC(nu=0.5, max_iter=2000)
                #clf = SKLLearnerAdapter(knn)
                #clf = SKLLearnerAdapter(linear_model.SGDClassifier())
                #clf.train(ds_temp_train)
                clf.fit(ds_temp_train.samples, ds_temp_train.targets)
                #predictions = clf.predict(ds_temp_not_train)
                predictions = clf.predict(ds_temp_not_train.samples)
                labels = ds_temp_not_train.targets
                bool_vec = predictions == labels
                acc_list.append(
                    sum(bool_vec) /
                    float(len(bool_vec)))  #array with accuracies for each pair
            if (optimize == 1):
                #print len(acc_list)
                #print np.mean(acc_list)
                return 1 - np.mean(acc_list)
            else:
                #print np.mean(acc_list), 'for chunk:', chunk
                return acc_list
Exemplo n.º 15
0
def svc(x_train, y_train, x_test, y_test):
    clf = NuSVC()  # class
    clf.fit(x_train, y_train)  # training the svc model
    result = clf.predict(x_test)  # predict the target of testing samples
    predict_list = result.tolist()
    cnt_true = 0
    for i in range(len(y_test)):
        if int(predict_list[i]) == int(y_test[i]):
            cnt_true += 1
    print float(cnt_true) / float(len(y_test))
Exemplo n.º 16
0
class SentimentAnalysis:
    #feature_number=400
    feature_number = 100

    def __init__(self, vec_method="TW", stop_words=()):
        '''
        构造函数
        :param vec_method: 向量化方法: TW, TC, TF, TF-IDF
        :param pos_data: list of sentences
        :param neg_data: list of sentences
        '''
        self.__vec_method = vec_method
        self.__stop_words = stop_words
        #self.__cut_method=cut_method

        #if pos_data and neg_data:
        #    self.load_data(pos_data,neg_data)

    def __get_vectorizer(self):
        __vectorizer_map = {
            'TW': CountVectorizer(binary=True, stop_words=self.__stop_words),
            'TC': CountVectorizer(stop_words=self.__stop_words),
            'TF': TfidfVectorizer(use_idf=False, stop_words=self.__stop_words),
            'TF-IDF': TfidfVectorizer(use_idf=True,
                                      stop_words=self.__stop_words),
        }
        return __vectorizer_map[self.__vec_method]

    '''
    def load_data(self,pos_data,neg_data,stop_words=()):
        self.__pos_data=pos_data
        self.__neg_data=neg_data
        self.__stop_words=stop_words
    '''

    def __vectorize(self, X):
        X = [' '.join(words) for words in X]
        self.__vectorizer = self.__get_vectorizer()
        return self.__vectorizer.fit_transform(X)

    def __feature_selection(self, X, y):
        self.__feature_selector = SelectKBest(chi2, k=self.feature_number)
        X = self.__feature_selector.fit_transform(X, y)
        return X

    def train(self, X, y):
        X = self.__vectorize(X)
        X = self.__feature_selection(X, y)
        self.__clf = NuSVC(nu=0.4, kernel='rbf').fit(X, y)

    def predict(self, X):
        X = [' '.join(words) for words in X]
        X = self.__vectorizer.transform(X)
        X = self.__feature_selector.transform(X)
        return self.__clf.predict(X)
Exemplo n.º 17
0
def svc_nu(X_train, categories,X_test, test_categories):
    from sklearn.svm import NuSVC

    svm_nu_classifier = NuSVC().fit(X_train, categories)
    y_svm_predicted = svm_nu_classifier.predict(X_test)
    print '\n Here is the classification report for support vector machine classiffier:'
    print metrics.classification_report(test_categories, y_svm_predicted)



    ''''
Exemplo n.º 18
0
    def train_test_SVM(self, X_train, y_train, X_test, y_test):
        print('Training SVM Classifier')
        svm_classifier = NuSVC()
        svm_classifier.fit(X_train, y_train)

        print('Testing SVM Classifier')
        y_pred = svm_classifier.predict(X_test)

        print(y_pred.shape)
        cm = confusion_matrix(y_test, y_pred)
        print(cm)
Exemplo n.º 19
0
    def build_single_svm(self, feature_pos, feature_un, neg_indices, label):
        if label != 'tfidf' and label != 'lsi':
            return False

        feature_neg = feature_un[neg_indices]
        feature_left = feature_un[[k for k in range(feature_un.shape[0]) if k not in neg_indices]]

        clf = NuSVC(nu=0.1, kernel='linear', probability=True)
        train_feature = vstack((feature_pos, feature_neg))
        train_target = np.concatenate((np.ones(feature_pos.shape[0]), -np.ones(feature_neg.shape[0])))
        clf.fit(train_feature, train_target)
        if label == 'tfidf':
            joblib.dump(clf, self.clf_tfidf_pos_path)
        else:
            joblib.dump(clf, self.clf_lsi_pos_path)

        logger_info.info(
            str(label) + ' score : ' + str(self.score(clf.predict(feature_pos), train_target[:feature_pos.shape[0]])))

        if self.enable_iteration:
            clf_i = NuSVC(nu=0.1, kernel='linear', probability=True)
            for i in range(self.iteration):
                train_feature = vstack((feature_pos, feature_neg))
                train_target = np.concatenate((np.ones(feature_pos.shape[0]), -np.ones(feature_neg.shape[0])))
                clf_i.fit(train_feature, train_target)
                if feature_left.shape[0] == 0:
                    break
                predicts = clf.predict(feature_left)
                n_indices = [item for item in range(len(predicts)) if predicts[item] != 1]
                p_indices = [item for item in range(len(predicts)) if predicts[item] == 1]
                if len(n_indices) > 0:
                    feature_neg = vstack((feature_neg, feature_un[n_indices]))
                    feature_left = feature_left[p_indices]
                else:
                    break
            recall = self.score(clf_i.predict(feature_pos), np.ones(feature_pos.shape[0]))

            logging.info('recall in train sets is : %d' % recall)
            if recall > 0.95:
                return clf_i
        return clf
Exemplo n.º 20
0
    def _test_nu_svc(self, num_classes, backend="torch", extra_config={}):
        model = NuSVC()
        np.random.seed(0)
        X = np.random.rand(100, 200)
        X = np.array(X, dtype=np.float32)
        y = np.random.randint(num_classes, size=100)

        model.fit(X, y)
        torch_model = hummingbird.ml.convert(model, backend, X, extra_config=extra_config)

        self.assertTrue(torch_model is not None)
        np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-6, atol=1e-6)
def testing():
    plot_x = range(1, 10)
    plot_y = []
    for i in xrange(1,10):
        vals = []
        for _ in xrange(20):
            train_data, validation_data, train_labels, validation_labels = split_data()
            clf = NuSVC(**get_kwargs(i))
            clf.fit(train_data, train_labels)
            vals.append(check_fit(clf.predict(validation_data), validation_labels))
        plot_y.append(np.mean(vals))

    plot_results(plot_x, plot_y)
def NuSVM(X_train, Y_train, X_test, Y_test):
    """NuSVM Method

    Use OneVsRestClassifier for this multi-class problem.
    And will generate the report for NuSVM

    Arg:
        X_train : The data for trainset
        Y_train : The label for trainset
        X_test : The data for testset
        Y_test : The label for testset
    """
    parameters = {'nu': (0.05, 0.02), 'gamma': [3e-2, 2e-2, 1e-2]}
    svc_clf = NuSVC(nu=0.1, kernel='rbf', verbose=False)
    gs_clf = GridSearchCV(svc_clf, parameters, verbose=False, n_jobs=24)
    svc_clf.fit(X_train, Y_train)

    predicted = svc_clf.predict(X_train)
    print("Train report of NuSVM ======= ")
    print(metrics.classification_report(Y_train, predicted))

    predicted = svc_clf.predict(X_test)
    print("Test report of NuSVM ======= ")
    print(metrics.classification_report(Y_test, predicted))
def nusvc_classifier(dir_models, ticket, x, x_test, y, y_test):
    print('getting model...NuSVC')
    clf = NuSVC(nu=0.8)

    print('training...')
    clf.fit(x, y)

    print('predicting...')
    predicted = clf.predict(x_test)
    print(classification_report(y_test, predicted))

    id = len(os.listdir(dir_models))
    joblib.dump(clf, dir_models + ticket + '_nusvc_' + str(id) + '.pkl')

    return clf.score(x_test, y_test)
def NuSVCMethod(trainData, testData, trainLabel, testLabel):
    info = {'name': 'NuSVCMethod', 'accuracy': 0, 'time': 0, 'remark': ''}
    startTime = time.time()

    from sklearn.svm import NuSVC

    clf = NuSVC()

    clf.fit(trainData, trainLabel)
    labelPred = clf.predict(testData)
    testAccuracy = accuracy_score(testLabel, labelPred)
    # print("SVM Test Accuracy: %.2f%%" % (testAccuracy * 100.0))
    info['time'] = time.time() - startTime
    info['accuracy'] = testAccuracy

    return info
Exemplo n.º 25
0
def test_nusvc():    
    # print '==== NuSVC ===='
    # print 'Training...'
    clf = NuSVC()
    clf = clf.fit( train_data, train_labels )
    
    # print 'Predicting...'
    output = clf.predict(test_data).astype(int)
    
    predictions_file = open("CLF.csv", "wb")
    open_file_object = csv.writer(predictions_file)
    open_file_object.writerow(["PassengerId","Survived"])
    open_file_object.writerows(zip(test_id, output))
    predictions_file.close()
    # print 'Done.'
    print 'NuSVC : '
Exemplo n.º 26
0
def nu_support_vector_machines(corpus, documents_training, documents_test, words_features, kernel, nu):
    """
    Another implementation of Support Vector Machines algorithm.
    :param corpus:
    :param documents_training:
    :param documents_test:
    :param words_features:
    :param kernel:
    :param nu:
    :return:
    """

    print
    print "----- nu-Support Vector Machines algorithm ------"
    print "Creating Training Vectors..."
    categories = util_classify.get_categories(corpus)  

    array_vector_training = []
    array_categories = []
    for (id, original_category, annotations) in documents_training:
        array_vector_training.append(util_classify.transform_document_in_vector(annotations, words_features, corpus))
        array_categories.append(util_classify.get_categories(corpus).index(original_category))    
        
    print "Training the algorithm..."
    classifier = NuSVC(nu=nu, kernel=kernel)

    X_train_features = []
    y_train_categories = []
    # Train all
    for (id, original_category, annotations) in documents_training:
        X_train_features.append(util_classify.transform_document_in_vector(annotations, words_features, corpus))
        y_train_categories.append(original_category)

    classifier.fit(np.array(X_train_features), np.array(y_train_categories))    

    print "Calculating metrics..."
    estimated_categories = []
    original_categories = []

    for (id, cat_original, annotations) in documents_test:
        cat_estimated = classifier.predict(np.array((util_classify.transform_document_in_vector(annotations, words_features, corpus))))
        estimated_categories.append(categories.index(cat_estimated))
        original_categories.append(categories.index(cat_original))

    return original_categories, estimated_categories
Exemplo n.º 27
0
    def predict(self):
        """
         trains the scikit-learn  python machine learning algorithm library function
         https://scikit-learn.org

         then passes the trained algorithm the features set and returns the
         predicted y test values form, the function

         then compares the y_test values from scikit-learn predicted to
         y_test values passed in

         then returns the accuracy
         """
        algorithm = NuSVC()
        algorithm.fit(self.X_train, self.y_train)
        y_pred = list(algorithm.predict(self.X_test))
        self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test)
        return self.acc
Exemplo n.º 28
0
def prediction(parent_text, child_text, type):

    pair = parent_text + " STOP " + child_text

    df_x, df_y, model = tfidf([pair], [])

    if type is 'NuSMV':
        clf = NuSVC()
    elif type is 'LinearSMV':
        clf = LinearSVC()
    else:
        clf = DecisionTreeClassifier()

    clf.fit(model[1:], df_y)
    prediction = clf.predict(model[0]).tolist()
    if (prediction[0] is 0):
        return "Attack"
    else:
        return "Support"
Exemplo n.º 29
0
def text_spam(text):
    raw_data = pd.read_excel("hindi_spam.xlsx")
    E_mails = raw_data
    i = 0
    for e in E_mails['text']:
        E_mails.text[i] = ''.join(list(map(purify, e)))
        E_mails.text[i] = E_mails.text[i].split()
        i = i + 1

    E_mails['text'] = list(map(rem_stopwords, E_mails['text']))

    email = []
    email = (list(map(hi_stem, E_mails['text'])))
    E_mails['text'] = email

    for i in range(0, E_mails.shape[0]):
        E_mails['text'][i] = ' '.join(E_mails['text'][i])

    transformer2 = TfidfVectorizer(ngram_range=(1, 1))
    counts2 = transformer2.fit_transform(E_mails['text'])

    NBModel = BernoulliNB().fit(counts2, E_mails['type'])
    SVCModel = SVC(kernel='linear').fit(counts2, E_mails['type'])
    NuSVCModel = NuSVC(kernel='linear').fit(counts2, E_mails['type'])
    RFModel = RandomForestClassifier(n_estimators=50, min_samples_split=3).fit(
        counts2, E_mails['type'])
    GBModel = GradientBoostingClassifier(n_estimators=50,
                                         min_samples_split=200).fit(
                                             counts2, E_mails['type'])

    counts1 = transformer2.transform([text])

    NBpred = NBModel.predict(counts1)
    SVCpred = SVCModel.predict(counts1)
    NuSVCpred = NuSVCModel.predict(counts1)
    RFpred = RFModel.predict(counts1)
    GBpred = GBModel.predict(counts1)

    pred_list = [NBpred, SVCpred, NuSVCpred, RFpred, GBpred]
    pred = max(pred_list, key=pred_list.count)

    return pred[0]
Exemplo n.º 30
0
Arquivo: nu_svc.py Projeto: lnxpy/lale
class NuSVCImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)

    def decision_function(self, X):
        return self._wrapped_model.decision_function(X)
Exemplo n.º 31
0
def classifier_prediction(docs, labels, documents):
    classifier_MNB = MultinomialNB().fit(docs, labels)
    classifier_BNB = BernoulliNB().fit(docs, labels)
    classifier_LR = LogisticRegression().fit(docs, labels)
    classifier_SGDC = SGDClassifier().fit(docs, labels)
    classifier_SVC = SVC().fit(docs, labels)
    classifier_LSVC = LinearSVC().fit(docs, labels)
    classifier_NuSVC = NuSVC().fit(docs, labels)

    egs = documents

    egs_count = vectorizer.transform(egs)

    #					Classifier Predictions 		 			  #
    ###############################################################
    predicted_MNB = classifier_MNB.predict(egs_count)
    predicted_BNB = classifier_BNB.predict(egs_count)
    predicted_LR = classifier_LR.predict(egs_count)
    predicted_SGDC = classifier_SGDC.predict(egs_count)
    predicted_SVC = classifier_SVC.predict(egs_count)
    predicted_LSVC = classifier_LSVC.predict(egs_count)
    predicted_NuSVC = classifier_NuSVC.predict(egs_count)
    ################################################################

    #										Classifications																		#
    #############################################################################################################################
    print("Multinomial Naive Bayes :- " + str(predicted_MNB))
    print
    print("Bernoulli Naive Bayes :- " + str(predicted_BNB))
    print
    print("Logistic Regression :- " + str(predicted_LR))
    print
    print("SGDC :- " + str(predicted_SGDC))
    print
    print("SVC :- " + str(predicted_SVC))
    print
    print("Linear SVC :- " + str(predicted_LSVC))
    print
    print("Nu SVC :- " + str(predicted_NuSVC))
Exemplo n.º 32
0
def main():
    start = time.time()
    trainX, trainY = loadImgFeature('trainingSet')
    testX, testY = loadImgFeature('testSet')
    clf = NuSVC()
    clf.fit(trainX,trainY)
    Z = clf.predict(testX)
    print("the total error rate is"+str(sum(Z!=testY) / float(len(testY))))

    error0,error1,total0,total1 = 0,0,0,0
    for i in range(len(Z)):
        if testY[i] == 0:
            total0 += 1
            if Z[i] != 0:
                error0 += 1
        else:
            total1 += 1
            if Z[i] != 1:
                error1 += 1
    print("\nthe total number of positive sample is %d,the positive sample error rate is %f." % (
    total1, error1 / float(total1)))
    print("\nthe total number of negative sample is %d,the negative sample error rate is %f." % (
    total0, error0 / float(total0)))
    print("spend time:%ss."%(time.time()-start))
Exemplo n.º 33
0
# Make a model with the best parameters
estimator = NuSVC(kernel='rbf', gamma=clf.best_estimator_.gamma,
                  nu=clf.best_estimator_.nu)
                # C=clf.best_estimator_.C)

# Plot the learning curve to find a good split
title = 'NuSVC'
plot_learning_curve(estimator, title, X_train, y_train, cv=cv, n_jobs=4)
p.savefig("supervised_learning_nusvc.pdf")

# Find a good number of test samples before moving on
# raw_input("Continue??")

# With a good number of test samples found, predict the whole set to the model
estimator.fit(X_train, y_train)
y_pred = estimator.predict(X_all)
DataFrame(y_pred).to_csv("supervised_prediction_labels_nusvc.csv")
print(classification_report(y_all, y_pred))
print "Best params are:" + str(clf.best_params_)
# Hold here
raw_input("Continue??")


# Now take the model found, and find the outliers

outlier_percent = 0.01

## FIGURE OUT WHAT TO DO HERE!!


# Use dim reduction to look at the space.
    for row in spamreader:
        #print len(row)
        if len(row) == 11 and "?" not in row :
            x.append(row[1:10])
            y.append(int(row[10]))
for i in x:
    for j in i:
        temp.append(int(j))
    z.append(temp)
    temp = []
########################################################################
#NuSVM classifier
from sklearn.svm import NuSVC
clf = NuSVC()
clf.fit(z[1:200], y[1:200])
valid = clf.predict(z[201:698])      
for i in valid:
    if i != y[count+201]:
        mis+=1
    count+=1
print("NuSVM misclassification rate is")
print(float(float(mis)/498) * 100)
#########################################################################
#Random Forest
from sklearn.ensemble import RandomForestClassifier
mis = 0
count=0
clf1 = RandomForestClassifier(n_estimators=10)
clf1.fit(z[1:200], y[1:200])
RandomForestClassifier(n_estimators=10, max_depth=None,
                       min_samples_split=1, random_state=0)
def nuSupportVectorClassification(dataFile, outputFolder, NU, kernelType, numDegree, numGamma, coef, isShrink, isProb, tolerance, cacheSize, classWeight, isVerbose, maxIter, decisionShape, randomState,parameters):
	inputData = yaml.load(open(dataFile))
	trainingSet = inputData['training']
	testingSet = inputData['testing']
	inputFile = inputData['inputFile']
	label = inputData['label']
	resultSet = []
	if not os.path.exists(outputFolder):
		try:
			os.makedirs(outputFolder)
		except OSError as exc:
		    if exc.errno != errno.EEXIST:
		        raise exc
		    pass
	for i in range(len(trainingSet)):
		"""testPredictions = []
		trainLabels = []
		trainFeatures = []
		trainDataSet = arff.load(trainingSet[i])
		for row in trainDataSet:
			content = list(row)
			trainFeatures.append(content[0:len(content)-1])
			trainLabels.append(content[len(content)-1])
		testFeatures = []
		testLabels = []
		testDataSet = arff.load(testingSet[i])
		for row in testDataSet:
			content = list(row)
			testFeatures.append(content[0:len(content)-1])
			testLabels.append(content[len(content)-1])"""
		train_df = pd.read_csv(trainingSet[i])
		train_labels = train_df[label]
		train_features = train_df.drop(label,axis=1)
		test_df = pd.read_csv(testingSet[i])
		test_predictions = pd.DataFrame(test_df[label])
		test_features = test_df.drop(label,axis=1)

		svm = NuSVC(nu=NU, kernel=kernelType, degree=numDegree, gamma=numGamma, coef0=coef, shrinking=isShrink, probability=isProb, tol=tolerance, cache_size=cacheSize, class_weight=classWeight, verbose=isVerbose, max_iter=maxIter, decision_function_shape=decisionShape, random_state=randomState)
		svm.fit(train_features, train_labels)
		test_predictions['predictions'] = svm.predict(test_features)
		#testPredictions = np.array(svm.predict(testFeatures)).tolist()
		resultFile = outputFolder + '/result' + str(i + 1) + '.csv'
		"""with open(resultFile,'w') as outfile:
			outfile.write('predictions:\n')
			outfile.write(yaml.dump(testPredictions, default_flow_style=False))
			outfile.write('true_labels:\n')
			outfile.write(yaml.dump(testLabels, default_flow_style=False))"""
		test_predictions.to_csv(resultFile,index=False)
		resultSet.append(resultFile)
	resultDict = dict()
	#parameters = dict()
	resultDict['results'] = resultSet
	resultDict['label'] = label
	"""parameters['parameter.n'] = NU
	parameters['parameter.k'] = kernelType
	parameters['parameter.d'] = numDegree
	parameters['parameter.g'] = numGamma
	parameters['parameter.f'] = coef
	parameters['parameter.s'] = isShrink
	parameters['parameter.p'] = isProb
	parameters['parameter.t'] = tolerance
	parameters['parameter.a'] = cacheSize
	parameters['parameter.w'] = classWeight
	parameters['parameter.v'] = isVerbose
	parameters['parameter.i'] = maxIter
	parameters['parameter.e'] = decisionShape
	parameters['parameter.r'] = randomState"""
	if not parameters:
		parameters['parameter']='default'
	resultDict['algo_params'] = parameters
	resultDict['split_params'] = inputData['split_params']
	if 'feature_selection_parameters' in inputData:
		resultDict['feature_selection_parameters'] = inputData['feature_selection_parameters']
		resultDict['feature_selection_algorithm'] = inputData['feature_selection_algorithm']
	if 'feature_extraction_parameters' in inputData:
		resultDict['feature_extraction_parameters'] = inputData['feature_extraction_parameters']
		resultDict['feature_extraction_algorithm'] = inputData['feature_extraction_algorithm']
	if 'preprocessing_params' in inputData:
		resultDict['preprocessing_params'] = inputData['preprocessing_params']
	resultDict['inputFile'] = inputFile
	resultDict['algorithm'] = "NuSupportVectorClassification"
	yaml.dump(resultDict, open(outputFolder + '/results.yaml', 'w'))
Ytest = np.ones((testshape[0]))
Ytest_predict = np.ones((testshape[0]))
Ytest[0:100] = 1
Ytest[100:200] = 2
Ytest[200:300] = 3

clf = NuSVC()
clf.fit(Xtrain, Ytrain)
NuSVC(cache_size=2000, class_weight=None, coef0=0.0,
      decision_function_shape=None, degree=3, gamma='auto', kernel='polynomial',
      max_iter=-1, nu=0.5, probability=False, random_state=None,
      shrinking=True, tol=0.00001, verbose=False)

for i in range(trainshape[0]):
    Ytrain_predict[i] = clf.predict([Xtrain[i, :]])

for i in range(valshape[0]):
    Yval_predict[i] = clf.predict([Xval[i, :]])

for i in range(testshape[0]):
    Ytest_predict[i] = clf.predict([Xtest[i, :]])

Y = np.concatenate((Ytrain, Yval, Ytest))
Y_predict = np.concatenate((Ytrain_predict, Yval_predict,
                            Ytest_predict))

cm_train = confusion_matrix(Ytrain, Ytrain_predict)
cm_val = confusion_matrix(Yval, Yval_predict)
cm_test = confusion_matrix(Ytest, Ytest_predict)
cm = confusion_matrix(Y, Y_predict)
Exemplo n.º 37
0
def myfunc(context, data):
    price_history = data.history(context.security_list,
                                 fields="price",
                                 bar_count=100,
                                 frequency="1d")

    try:
        # For loop for each stock traded everyday:
        for s in context.security_list:

            start_bar = context.feature_window
            price_list = price_history[s].tolist()
            past = data.current(s, 'past_data')
            pastlist = custom_split(past)
            #print isinstance(past, str)
            #print isinstance(custom_split(past), list)

            print pastlist
            print len(past)
            print len(pastlist)
            print len(price_list)
            #print past[1:-1]

            X = []
            y = []

            bar = start_bar

            # Loop for each machine learning data set
            while bar < len(price_list) - 1:

                # print s," price: ",data.history(s, 'price', 100 , "1d")
                try:
                    end_price = price_list[bar]
                    start_price = price_list[bar - 1]

                    features = pastlist[(bar - 3) * 4:bar * 4]
                    # Featuers are the attribute value used for machine learning.
                    #print(features)

                    if end_price > start_price:
                        label = 1
                    else:
                        label = -1
                    # Label is the indicator of whether this stock will rise or fall
                    bar += 1

                    X.append(features)
                    y.append(label)

                    #print X
                    #print y

                except Exception as e:

                    bar += 1
                    print(('feature creation', str(e)))

            print('len(X1)', len(X))

            # Call the machined learning model
            clf1 = RandomForestClassifier(n_estimators=100)
            clf2 = LinearSVC()
            clf3 = NuSVC()
            clf4 = LogisticRegression()

            # Rrepare the attribute information for prediction
            current_features = pastlist[384:396]

            X.append(current_features)
            print('len(X2)', len(X))

            # Rescall all the data
            X = preprocessing.scale(X)

            current_features = X[-1:]
            X = X[:-1]

            print current_features
            print('len(X)', len(X))
            print('len(y)', len(y))

            # Build the model
            clf1.fit(X, y)
            clf2.fit(X, y)
            clf3.fit(X, y)
            clf4.fit(X, y)

            # Predict the results
            p1 = clf1.predict(current_features)[0]
            p2 = clf2.predict(current_features)[0]
            p3 = clf3.predict(current_features)[0]
            p4 = clf4.predict(current_features)[0]

            # If 3 out of 4 prediction votes for one same results, this results will be promted to be the one I will use.
            if Counter([p1, p2, p3, p4]).most_common(1)[0][1] >= 3:
                p = Counter([p1, p2, p3, p4]).most_common(1)[0][0]

            else:
                p = 0

            print(('Prediction', p))

            current_price = data.current(s, 'price')
            current_position = context.portfolio.positions[s].amount
            cash = context.portfolio.cash

            # Add one more feature: moving average
            print('price_list', price_list)
            sma_50 = numpy.mean(price_list[-50:])
            sma_20 = numpy.mean(price_list[-20:])
            print('sma_20', sma_20)
            print('sma_50', sma_50)

            open_orders = get_open_orders()

            # Everyday's trading activities:
            if (p == 1) or (sma_20 > sma_50):
                if s not in open_orders:
                    order_target_percent(
                        s,
                        context.weight,
                        style=StopOrder(context.stop_loss_pct * current_price))
                    cash -= context.investment_size
            elif (p == -1) or (sma_50 > sma_20):
                if s not in open_orders:
                    order_target_percent(s, -context.weight)

    except Exception as e:
        print(str(e))
Exemplo n.º 38
0
     y_test = labels[272:,i]
 else:
     X_train = training
     y_train = labels[:172,i]
     X_test = sampletest
     y_test = labels[172:,i]
 #best case: 67, 1
 posterior = np.empty([100,72,6])
 for j in range(1,67):
     for k in range(1,2):
         box = np.zeros([6,6])
         accuracy = np.zeros(72)
         for m in range(0,10):
             nsvc = NuSVC(nu=j/100.0, degree=k)
             nsvc.fit(X_train, y_train)
             y_pred = nsvc.predict(X_test)
             
             n=0
             for i in range(0,len(y_pred)):
                 if y_pred[i] == y_test[i]:
             #print i, y_pred[i], y_test[i]
                     n = n+1
                     accuracy[i] = accuracy[i]+1
                 box[y_test[i]-1,y_pred[i]-1] = box[y_test[i]-1,y_pred[i]-1] + 1
             #posterior[m] =  knc.predict_proba(X_test)
         #print j, k, np.mean(accuracy)/0.72, np.std(accuracy)/0.72
         print j, k, sum(accuracy[0:8])/8.0, sum(accuracy[8:18])/10.0, sum(accuracy[18:30])/12.0, sum(accuracy[56:72])/16.0, sum(accuracy[30:43])/13.0, sum(accuracy[43:56])/13.0, sum(accuracy)/72.0
     '''
 means = np.empty([72,6])
 stds = np.empty([72,6])
 grid = np.empty([6,6])
Exemplo n.º 39
0
    cm = [None] * subjects
    for subject in range(subjects):
        # Concatenate the subjects' data for training into one matrix
        train_subjects = list(range(subjects))
        train_subjects.remove(subject)
        TRs = image_data_shared[0].shape[1]
        train_data = np.zeros((image_data_shared[0].shape[0], len(train_labels)))
        for train_subject in range(len(train_subjects)):
            start_index = train_subject*TRs
            end_index = start_index+TRs
            train_data[:, start_index:end_index] = image_data_shared[train_subjects[train_subject]]
    
        # Train a Nu-SVM classifier using scikit learn
        classifier = NuSVC(nu=0.5, kernel='linear')
        classifier = classifier.fit(train_data.T, train_labels)
    
        # Predict on the test data
        predicted_labels = classifier.predict(image_data_shared[subject].T)
        accuracy[subject] = sum(predicted_labels == test_labels)/float(len(predicted_labels))
    
        # Create a confusion matrix to see the accuracy of each class
        cm[subject] = confusion_matrix(test_labels, predicted_labels)
    
        # Normalize the confusion matrix
        cm[subject] = cm[subject].astype('float') / cm[subject].sum(axis=1)[:, np.newaxis]
    
    
    # Plot and print the results
    plot_confusion_matrix(cm, title="Confusion matrices for different test subjects with Probabilistic SRM")
    print("SRM: The average accuracy among all subjects is {0:f} +/- {1:f}".format(np.mean(accuracy), np.std(accuracy)))
def final_run():
    clf = NuSVC()
    clf.fit(train, trainLabels)
    predict_labels = clf.predict(test)
    write_out(predict_labels)
def test_full():
    clf = NuSVC()
    clf.fit(train, trainLabels)
    print check_fit(clf.predict(train), trainLabels)
Exemplo n.º 42
0
        train_x, test_x = x[train_index], x[test_index]
        train_y, test_y = y[train_index], y[test_index]

        gnb.fit(train_x, train_y)
        gnbrec.append( float((gnb.predict(test_x) == test_y).sum())/ len(test_y) )

        nonneg_train_x = train_x - train_x.min()
        nonneg_test_x = test_x - test_x.min()
        mnb.fit(nonneg_train_x, train_y)
        mnbrec.append( float((mnb.predict(nonneg_test_x) == test_y).sum())/ len(test_y) )

        bnb.fit(train_x, train_y)
        bnbrec.append( float((bnb.predict(test_x) == test_y).sum())/ len(test_y) )

        svm.fit(train_x, train_y)
        svmrec.append( float((svm.predict(test_x) == test_y).sum())/ len(test_y) )

        _ = PCA(n_components=20).fit(train_x)
        train_x = _.transform(train_x)
        test_x = _.transform(test_x)
        print train_x.shape
        L = lmnn.fit(train_x, train_y, verbose=True).L
        lmnnrec.append( knn(np.dot(train_x, L), train_y, np.dot(test_x, L), test_y, K=5) )

    print '\tSVM accuracy: {} = {}'.format(svmrec, np.mean(svmrec))
    print '\tLMNN accuracy: {} = {}'.format(lmnnrec, np.mean(lmnnrec))
    print '\tGaussianNB accuracy: {} = {}'.format(gnbrec, np.mean(gnbrec))
    print '\tMultinomiaNB accuracy: {} = {}'.format(mnbrec, np.mean(mnbrec))
    print '\tBernoulliNB accuracy: {} = {}'.format(bnbrec, np.mean(bnbrec))

Exemplo n.º 43
0
linearSvc=LinearSVC(
            penalty='l2',            #l1 l2
            loss='squared_hinge',    #'hinge'或者'squared_hinge'指定损失函数。 'hinge'是标准的SVM损失(例如由SVC类使用),而'squared_hinge'是 'hinge'的平方。
            dual=False,              # 优化问题。当n_samples(样本数)> n_features(属性数)时,优先使用dual = False。
            tol=1e-4,
            C=1.0,
            multi_class='ovr',       #'ovr'、'crammer_singer' 多个分类时会有影响
            fit_intercept=True,      #是否计算截距
            intercept_scaling=1,
            class_weight=None,
            verbose=0,
            random_state=None,
            max_iter=1000
            )



svc.fit(X,y)
nuSvc.fit(X,y)
linearSvc.fit(X,y)

#我们先来简单的预测一下
Z=np.array([[-1,-2]])
y_svc=svc.predict(Z)
y_nuSvc=nuSvc.predict(Z)
y_LSvc=linearSvc.predict(Z)
#Z[0]被分到了第一类
print(y_svc)
print(y_nuSvc)
print(y_LSvc)
Exemplo n.º 44
0
        accu = sum(pred_label == tst_label)/float(len(pred_label))
        """

    print 'standardization'
    #print trn_data
    #print tst_data
    #trn_data_scaled = preprocessing.scale(trn_data)
    #tst_data_scaled = preprocessing.scale(tst_data)
    scaler = preprocessing.StandardScaler().fit(trn_data)
    trn_data_scaled = scaler.transform(trn_data)
    tst_data_scaled = scaler.transform(tst_data)
    #print trn_data_scaled
    #print tst_data_scaled
    clf = NuSVC(nu=0.5, kernel = 'linear')
    clf.fit(trn_data_scaled, trn_label)
    pred_label = clf.predict(tst_data_scaled)
    print pred_label
    print clf.decision_function(tst_data_scaled)
    accu = sum(pred_label == tst_label)/float(len(pred_label))


    
    if args.align_algo in ['ppca_idvclas','pica_idvclas']:
        for it in range(11):
            np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(it)+'.npz',accu = accu)
    else:
        np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(itr)+'.npz',accu = accu)
        #np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(10)+'.npz',accu = accu)
    print options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(itr)+'.npz'
    print np.mean(accu)
Exemplo n.º 45
0
        accu = sum(pred_label == tst_label)/float(len(pred_label))
        """

    print 'standardization'
    #print trn_data
    #print tst_data
    #trn_data_scaled = preprocessing.scale(trn_data)
    #tst_data_scaled = preprocessing.scale(tst_data)
    scaler = preprocessing.StandardScaler().fit(trn_data)
    trn_data_scaled = scaler.transform(trn_data)
    tst_data_scaled = scaler.transform(tst_data)
    #print trn_data_scaled
    #print tst_data_scaled
    clf = NuSVC(nu=0.5, kernel='linear')
    clf.fit(trn_data_scaled, trn_label)
    pred_label = clf.predict(tst_data_scaled)
    print pred_label
    print clf.decision_function(tst_data_scaled)
    accu = sum(pred_label == tst_label) / float(len(pred_label))

    if args.align_algo in ['ppca_idvclas', 'pica_idvclas']:
        for it in range(11):
            np.savez_compressed(options['working_path'] + opt_group_folder +
                                args.align_algo + '_acc_' + str(it) + '.npz',
                                accu=accu)
    else:
        np.savez_compressed(options['working_path'] + opt_group_folder +
                            args.align_algo + '_acc_' + str(itr) + '.npz',
                            accu=accu)
        #np.savez_compressed(options['working_path']+opt_group_folder+args.align_algo+'_acc_'+str(10)+'.npz',accu = accu)
    print options[
Exemplo n.º 46
0
def handle_data(context, data):
    prices = history(bar_count = context.historical_bars, frequency='1d', field='price')

    for stock in context.stocks:
        try:
            # create moving averages for 50 and 200 days to filter the results that we want
            # to get out of the nueral network.
            ma1 = data[stock].mavg(50)
            ma2 = data[stock].mavg(200)

            start_bar = context.feature_window
            price_list = prices[stock].tolist()

            X = []
            y = []

            bar = start_bar

            # feature creation
            # this is where I build out the Neural Network that 
            # learns from the history of the stocks.
            while bar < len(price_list)-1:
                try:
                    end_price = price_list[bar+1]
                    begin_price = price_list[bar]

                    pricing_list = []
                    xx = 0
                    for _ in range(context.feature_window):
                        price = price_list[bar-(context.feature_window-xx)]
                        pricing_list.append(price)
                        xx += 1

                    features = np.around(np.diff(pricing_list) / pricing_list[:-1] * 100.0, 1)


                    # print(features)

                    if end_price > begin_price:
                        label = 1
                    else:
                        label = -1

                    bar += 1
                    X.append(features)
                    y.append(label)

                except Exception as e:
                    bar += 1
                    print(('feature creation',str(e)))




            clf1 = RandomForestClassifier()
            clf2 = LinearSVC()
            clf3 = NuSVC()
            clf4 = LogisticRegression()

            last_prices = price_list[-context.feature_window:]
            current_features = np.around(np.diff(last_prices) / last_prices[:-1] * 100.0, 1)

            X.append(current_features)
            X = preprocessing.scale(X)

            current_features = X[-1]
            X = X[:-1]

            clf1.fit(X,y)
            clf2.fit(X,y)
            clf3.fit(X,y)
            clf4.fit(X,y)

            p1 = clf1.predict(current_features)[0]
            p2 = clf2.predict(current_features)[0]
            p3 = clf3.predict(current_features)[0]
            p4 = clf4.predict(current_features)[0]
            
            
            if Counter([p1,p2,p3,p4]).most_common(1)[0][1] >= 4:
                p = Counter([p1,p2,p3,p4]).most_common(1)[0][0]
                
            else:
                p = 0
                
            print(('Prediction',p))


            if p == 1 and ma1 > ma2:
                order_target_percent(stock,0.33)
            elif p == -1 and ma1 < ma2:
                order_target_percent(stock,-0.33)      

        except Exception as e:
            print(str(e))
            
            
    record('ma1',ma1)
    record('ma2',ma2)
    record('Leverage',context.account.leverage)
Exemplo n.º 47
0
print svc_new.score(test_x_reduced, test_y_practice)


print 'Predicting'
estimator = SelectKBest(score_func=f_classif, k=components)
estimator.fit(train_x, train_y_leaderboard)
train_x_reduced = estimator.transform(train_x)
test_x_reduced = estimator.transform(test_x)
print train_x.shape
print train_x_reduced.shape

#svc_new = SVC(probability=True, C=.000001, kernel='poly', gamma=4,
#                  degree=4)
svc_new = NuSVC(kernel='poly', probability=True, gamma=0, nu=.5852, tol=.00001)
svc_new.fit(train_x_reduced, train_y_leaderboard)
output = svc_new.predict(test_x_reduced)
"""
"""
print 'Outputting'
open_file_object = csv.writer(open(
                              "simple" + str(datetime.now().isoformat()) +
                              ".csv", "wb"))
open_file_object.writerow(['case_id', 'Target_Leaderboard'])
i = 0
for row in entries:
    open_file_object.writerow([row, output[i].astype(np.uint8)])
    i += 1

print 'Done'
print datetime.now() - start
# Time = 7276.782202

# Saving data
joblib.dump(clf, learning_model_path)




########### Testing ####################################

print("Making Testing Data...")
test_data = np.array(p.read_csv(filepath_or_buffer=csv_test_path, header=None, sep=',', index_col=0))[:, :]
test_label = np.ravel(np.array(p.read_csv(filepath_or_buffer=csv_test_path, header=None, sep=',', usecols=[0]))[:, :])

print("Calculating Score...")
predict = clf.predict(test_data)

from sklearn.metrics import accuracy_score
print(accuracy_score(test_label, predict))

from sklearn.metrics import classification_report
print(classification_report(test_label, predict))

from sklearn import metrics
print ( metrics.confusion_matrix(test_label, predict) )


########### Results ####################################

# SVM
Exemplo n.º 49
0
                              max_features=None)

clf5.fit(X_train, y_train)
pred5 = clf5.predict(X_test)
acc5 = accuracy_score(pred5, y_test)
cm5 = confusion_matrix(y_test, pred5)
f5 = np.sum(cm5, axis=1)
cm5 = cm5 / f5 * 100
print('Accuracy: {:.2f}%'.format(acc5 * 100))
fig, ax = plt.subplots(figsize=(8, 6), dpi=100)
ax = sns.heatmap(cm5, annot=True, ax=ax, fmt='.2f')

from sklearn.svm import NuSVC
clf6 = NuSVC()
clf6.fit(X_train, y_train)
pred6 = clf6.predict(X_test)
acc6 = accuracy_score(pred6, y_test)
cm6 = confusion_matrix(y_test, pred6)
f6 = np.sum(cm6, axis=1)
cm6 = cm6 / f6 * 100
print('Accuracy: {:.2f}%'.format(acc6 * 100))
fig, ax = plt.subplots(figsize=(8, 6), dpi=100)
ax = sns.heatmap(cm6, annot=True, ax=ax, fmt='.2f')

#saving the models
clfP = pickle.dumps(clf, open('../data/clf.sav', 'wb'))
clfP2 = pickle.dumps(clf2, open('../data/clf2.sav', 'wb'))
clfP3 = pickle.dumps(clf3, open('../data/clf3.sav', 'wb'))
clfP4 = pickle.dumps(clf4, open('../data/clf4.sav', 'wb'))
clfP5 = pickle.dumps(clf5, open('../data/clf5.sav', 'wb'))
Exemplo n.º 50
0
clf_LogisticRegression = LogisticRegression(n_jobs=-1).fit(X_train, y_train)
predicted_LogisticRegression = clf_LogisticRegression.predict(X_test)
accuracy_LogisticRegression = np.mean(predicted_LogisticRegression == y_test)

clf_LogisticRegression_f = open("pickled_algos/clf_LogisticRegression.pickle", "wb")
pickle.dump(clf_LogisticRegression, clf_LogisticRegression_f)
clf_LogisticRegression_f.close()

print('LogisticRegression accuracy: %s' %accuracy_LogisticRegression)
print(metrics.classification_report(predicted_LogisticRegression, y_test))



#NuSVC Classifier
clf_NuSVC = NuSVC().fit(X_train, y_train)
predicted_NuSVC = clf_NuSVC.predict(X_test)
accuracy_NuSVC = np.mean(predicted_NuSVC == y_test)

clf_NuSVC_f = open("pickled_algos/clf_NuSVC.pickle", "wb")
pickle.dump(clf_NuSVC, clf_NuSVC_f)
clf_NuSVC_f.close()

print('NuSVC accuracy: %s' %accuracy_NuSVC)
print(metrics.classification_report(predicted_NuSVC, y_test))



#LinearSVC Classifier
clf_LinearSVC = LinearSVC().fit(X_train, y_train)
predicted_LinearSVC = clf_LinearSVC.predict(X_test)
accuracy_LinearSVC = np.mean(predicted_LinearSVC == y_test)
Exemplo n.º 51
0
for train_index, test_index in kf.split(XX):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = XX[train_index], XX[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf = NuSVC()
    clf.fit(X_train, y_train)
    NuSVC(cache_size=200,
          class_weight=None,
          coef0=0.0,
          decision_function_shape=None,
          degree=1,
          gamma='auto',
          kernel='rbf',
          max_iter=-1,
          nu=0.5,
          probability=False,
          random_state=None,
          shrinking=True,
          tol=0.001,
          verbose=False)
    y_predict = clf.predict(X_test)
    for i in range(0, len(y_predict) - 1):
        if y_predict[i] == y_test[i]:
            dum[ii] = 1
            ii = ii + 1
        else:
            dum[ii] = 0
            ii = ii + 1
print("Accuracy is")
print((np.sum(dum) / len(XX)) * 100, "%")
Exemplo n.º 52
0
        if model_type == 'lsi':
            cs_vec = get_cs_vec(hyp1, hyp2, ref)
        elif model_type == 'lda':
            cs_vec = get_cs_vec(hyp1, hyp2, ref)
            #cs1 = kl_divergence(v_hyp1, v_ref)
            #cs2 = kl_divergence(v_hyp2, v_ref)
            pass
        lm_vec = get_lm_vec(hyp1, hyp2, ref)
        train_sample = []
        train_sample += cs_vec
        train_sample += lm_vec
        train_sample += m_vec
        P.append(train_sample)
    print 'test samples', np.shape(np.array(P))
    print 'training classifiers...'
    for k in ['rbf', 'linear']:
        simpleclf = SVC(kernel=k, cache_size=4000)
        simpleclf.fit(np.array(X), np.array(answers[st:sp]), sample_weight=np.array(sample_weights[st:sp]))
        scores = cross_validation.cross_val_score(simpleclf, np.array(X), np.array(answers[st:sp]))
        print 'SVC:', simpleclf.kernel, 'CV(3)',sum(scores) / len(scores)
        preditions = simpleclf.predict(np.array(P))
        np.savetxt('SVC-' + simpleclf.kernel + '.3.pred', preditions, fmt='%d')

        nuclf = NuSVC(kernel=k, cache_size=4000)
        nuclf.fit(np.array(X), np.array(answers[st:sp]), sample_weight=np.array(sample_weights[st:sp]))
        scores = cross_validation.cross_val_score(nuclf, np.array(X), np.array(answers[st:sp]))
        print 'NuSVC:', nuclf.kernel, 'CV(3)',sum(scores) / len(scores)
        preditions = nuclf.predict(np.array(P))
        np.savetxt('NuSVC-' + nuclf.kernel + '.3.pred', preditions, fmt='%d')