target = []
split = 0.67
loadDataset('spambase.data', split, data, target)
X = data  # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
y = target

h = .02  # step size in the mesh

# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
C = 1.0  # SVM regularization parameter
svc = svm.SVC().fit(X, y)
rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, y)
lin_svc = svm.LinearSVC(C=C).fit(X, y)
Nu_svc = svm.NuSVC().fit(X, y)
zero = []
one = []

for i in data:
    zero.append(i[0])
    one.append(i[1])
#print zero
#print one

# create a mesh to plot in
x_min, x_max = min(zero), max(zero)
y_min, y_max = min(one), max(one)
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

# title for the plots
Exemplo n.º 2
0
    linear_model.LogisticRegressionCV(),
    linear_model.PassiveAggressiveClassifier(),
    linear_model.RidgeClassifierCV(),
    linear_model.SGDClassifier(),
    linear_model.Perceptron(),

    # Navies Bayes
    naive_bayes.BernoulliNB(),
    naive_bayes.GaussianNB(),

    # Nearest Neighbor
    neighbors.KNeighborsClassifier(),

    # SVM
    svm.SVC(probability=True),
    svm.NuSVC(probability=True),
    svm.LinearSVC(),

    # Trees
    tree.DecisionTreeClassifier(),
    tree.ExtraTreeClassifier(),

    # Discriminant Analysis
    discriminant_analysis.LinearDiscriminantAnalysis(),
    discriminant_analysis.QuadraticDiscriminantAnalysis(),

    # xgboost: http://xgboost.readthedocs.io/en/latest/model.html
    XGBClassifier()
]
from sklearn import feature_selection
from sklearn import model_selection
from sklearn import svm, metrics
from sklearn.model_selection import train_test_split  # training, test data를 자동으로 나누는 라이브러리
'''
# 붗꽃 데이터 분류기(머신러닝)
- 개요 : 150개 붗꽃 정보(꽃받침 길이, 꽃받침 폭, 꽃잎 길이, 꽃잎 폭)
- 종류 : 3개 (Iris-setosa, Iris-vesicolor, Iris-virginica)
- CSV 파일 : 검색 iris.csv
'''
##0. 훈련데이터, 테스트데이터 준비
csv = pd.read_csv("C:/BigData/iris.csv")
data = csv.iloc[:, 0:-1]
label = csv.iloc[:, [-1]]
## 학습용, 훈련용 분리
train_data, test_data, train_label, test_label = \
    train_test_split(data,label, train_size =0.6)

#1. Classifier 생성(선택) --> 머신러닝 알고리즘 선택
clf = svm.NuSVC(gamma="auto")  # clf는 classifier의 약자

#2. 데이터로 학습 시키기
#clf.fit( [ 훈련데이터 ], [ 정답 ])
clf.fit(train_data, train_label)

#3. 정답률을 확인(신뢰도)
results = clf.predict(test_data)
score = metrics.accuracy_score(results, test_label)
print("정답률 :", score * 100, '%')

#4. 내꺼 예측하기
result = clf.predict([[4.1, 3.3, 1.5, 0.2]])
print(result)
Exemplo n.º 4
0
        regression(light_reg.LinearSVR(random_state=RANDOM_SEED)),
        classification(
            light_clf.LinearSVC(criterion="accuracy",
                                random_state=RANDOM_SEED)),
        classification(
            light_clf.LinearSVC(criterion="auc", random_state=RANDOM_SEED)),
        classification_binary(
            light_clf.LinearSVC(criterion="accuracy",
                                random_state=RANDOM_SEED)),
        classification_binary(
            light_clf.LinearSVC(criterion="auc", random_state=RANDOM_SEED)),

        # Sklearn SVM
        regression(svm.NuSVR(kernel="rbf")),
        regression(svm.SVR(kernel="rbf")),
        classification(svm.NuSVC(kernel="rbf", **SVC_PARAMS)),
        classification(svm.SVC(kernel="rbf", **SVC_PARAMS)),
        classification_binary(svm.NuSVC(kernel="rbf", **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="linear", **SVC_PARAMS)),
        classification_binary(
            svm.SVC(kernel="poly",
                    C=1.5,
                    degree=2,
                    gamma=0.1,
                    coef0=2.0,
                    **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="rbf", **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="sigmoid", **SVC_PARAMS)),

        # Lightning SVM
        classification(
Exemplo n.º 5
0
def svm_nusvc(X, y):
    clf = svm.NuSVC()
    return clf.fit(X, y)
Exemplo n.º 6
0
    def train(self, X, T, kernel, deg, param):
        svc = svm.NuSVC(nu=param, kernel=kernel, degree=deg)
        svc.fit(X, T)

        self.model = svc
Exemplo n.º 7
0
    assert_array_almost_equal(clf.predict(X), [2] * 6)

    X_, y_ = make_classification(n_samples=200,
                                 n_features=10,
                                 weights=[0.833, 0.167],
                                 random_state=2)

    for clf in (linear_model.LogisticRegression(),
                svm.LinearSVC(random_state=0), svm.SVC()):
        clf.set_params(class_weight={0: .1, 1: 10})
        clf.fit(X_[:100], y_[:100])
        y_pred = clf.predict(X_[100:])
        assert f1_score(y_[100:], y_pred) > .3


@pytest.mark.parametrize("estimator", [svm.SVC(C=1e-2), svm.NuSVC()])
def test_svm_classifier_sided_sample_weight(estimator):
    # fit a linear SVM and check that giving more weight to opposed samples
    # in the space will flip the decision toward these samples.
    X = [[-2, 0], [-1, -1], [0, -2], [0, 2], [1, 1], [2, 0]]
    estimator.set_params(kernel='linear')

    # check that with unit weights, a sample is supposed to be predicted on
    # the boundary
    sample_weight = [1] * 6
    estimator.fit(X, Y, sample_weight=sample_weight)
    y_pred = estimator.decision_function([[-1., 1.]])
    assert y_pred == pytest.approx(0)

    # give more weights to opposed samples
    sample_weight = [10., .1, .1, .1, .1, 10]
Exemplo n.º 8
0
w = clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(0, 1)
yy = a * xx - (clf.intercept_[0]) / w[1]

plt.figure(0)
plt.plot(xx, yy, 'k-')
plt.plot(x[c == 1], y[c == 1], 'ro')
plt.plot(x[c == 0], y[c == 0], 'bo')
plt.title("C=" + str(C) + " ;gamma=" + str(gamma) + " ;score: " + str(score))
#--------------------------------------------------------
#Non Linear Classifier
# fit the model
tol = 0.001
clf = svm.NuSVC(tol=tol)
X = xyc[:, :2]
Y = xyc[:, 2]
clf.fit(X, Y)
score = clf.score(X, Y)
print("Non linear score:", score)
#Z=clf.decision_function(X)
#print(Z)
#Draw points of classification----------------------------------
h = .02  # step size in the mesh
# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = Y.min() - 1, Y.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

# Plot the decision boundary. For that, we will assign a color to each
def parse_para_and_get_model(param_dict):

    #param_dict = json.loads(ml_opts_jstr)
    model_name = param_dict['learning_algorithm']  # 1: linear_svm; 2: ; 3:

    ###parse and print print parameters###
    print "INFO: ============Learning Algorithm and Parameters============="
    print "INFO: param_dict=", param_dict
    if model_name == "linear_svm":
        ### 1: linearSVM
        C = eval(param_dict['c'])
        C = float(C)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: ====================1: Linear SVM============="
        clf = svm.LinearSVC(C=C)

    elif model_name == "svm":
        ### 2: SVM with kernel
        C = eval(param_dict['c'])
        C = float(C)
        kernel_func = param_dict['kernel']
        gamma_val = "0.0"
        if 'gamma' in param_dict:
            gamma_val = eval(param_dict['gamma'])
            gamma_val = float(gamma_val)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: kernel = ", kernel_func
        print "INFO: gamma = ", gamma_val
        if kernel_func == "poly":
            degree_num = eval(param_dict['degree'])
            print "degree = ", degree_num
        print "INFO: ====================2: SVM with kernel============="
        if kernel_func == "poly":
            clf = svm.SVC(C=C,
                          kernel=kernel_func,
                          gamma=gamma_val,
                          degree=degree_num)
        elif kernel_func == "rbf" or kernel_func == "sigmoid":
            clf = svm.SVC(C=C, kernel=kernel_func, gamma=gamma_val)
        else:
            clf = svm.SVC(C=C, kernel=kernel_func)

    elif model_name == "nu_svm":
        ### 3: NuSVC
        nu_val = eval(param_dict['nu'])
        nu_val = float(nu_val)
        kernel_func = param_dict['kernel']
        gamma_val = eval(param_dict['gamma'])
        gamma_val = float(gamma_val)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: nu = ", nu_val
        print "INFO: kernel = ", kernel_func
        print "INFO: gamma = ", gamma_val
        if kernel_func == "poly":
            degree_num = eval(param_dict['degree'])
            print "INFO: degree = ", degree_num
        print "INFO: ====================3: NuSVC============="
        if kernel_func == "poly":
            clf = svm.NuSVC(nu=nu_val,
                            kernel=kernel_func,
                            gamma=gamma_val,
                            degree=degree_num)
        elif kernel_func == "rbf" or kernel_func == "sigmoid":
            clf = svm.NuSVC(nu=nu_val, kernel=kernel_func, gamma=gamma_val)
        else:
            clf = svm.NuSVC(nu=nu_val, kernel=kernel_func)

    elif model_name == "logistic_regression":
        ### 4: linearSVM
        C = eval(param_dict['c'])
        C = float(C)
        # penalty from CV, regularization from non-CV training
        if 'regularization' in param_dict:
            regularization = param_dict['regularization']
        elif 'penalty' in param_dict:
            regularization = param_dict['penalty']
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: penalty = ", regularization
        print "INFO: ====================4: Logistic Regression============="
        clf = linear_model.LogisticRegression(C=C, penalty=regularization)

    elif model_name == "linear_svm_with_sgd":
        ### 5: linearSVM with SGD, no para as input
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: ====================5: Linear SVM with SGD============="
        clf = linear_model.SGDClassifier()
    elif model_name == "passive_aggressive_classifier":
        ### 6: Passive Aggressive Classifier
        C = eval(param_dict['c'])
        C = float(C)
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: C = ", C
        print "INFO: ====================6: Passive Aggressive Classifier============="
        clf = linear_model.PassiveAggressiveClassifier(C=C)
    elif model_name == "perceptron":
        ### 7: Perceptron
        print "INFO: Learning Algorithm: ", model_name
        print "INFO: ====================7: Perceptron============="
        clf = linear_model.Perceptron()
    else:
        print "INFO: Training model selection error: no valid ML model selected!"
        return (0, "none")
    return (clf, model_name)
Exemplo n.º 10
0
def fit_SVM(X_train, y_train, _gamma="auto"):
    from sklearn import svm
    clf = svm.NuSVC(gamma=_gamma)
    clf.fit(X_train, y_train.ravel())
    return clf
Exemplo n.º 11
0
def run(X, y, g):
    clf = svm.NuSVC(gamma=g)
    clf.fit(X, y)
    return clf
def parse_param_and_get_model(param_dict):

    #param_dict = json.loads(j_str)
    model_name = param_dict['learning_algorithm']  # 1: linear_svm; 2: ; 3:
    cv = eval(param_dict['cv'])
    mode = param_dict['mode']
    api = param_dict['api']

    print "INFO: Learning Algorithm: ", model_name
    print "INFO: CV = ", cv
    print "INFO: mode = ", mode
    print "INFO: API use: ", api
    ###parse and print print parameters###
    print "INFO: ============ Learning Algorithm and Grid Search Parameters ============="

    if model_name == "linear_svm":
        ### 1: linearSVM
        if mode == "cheap":
            param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000]}]
        else:
            param_dic = [{
                'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
            }]
        print "INFO: Grid Search Parameters:"
        print "INFO: C = ", param_dic[0]['C']
        print "INFO: ====================1: Linear SVM============="
        clf = svm.LinearSVC()
    elif model_name == "svm":
        ### 2: SVM with kernel
        if mode == "cheap":
            param_dic = [{
                'C': [0.01, 1, 100],
                'kernel': ['rbf', 'sigmoid'],
                'gamma': [0.0, 0.5]
            }, {
                'C': [0.01, 1, 100],
                'kernel': ['linear']
            }, {
                'C': [0.01, 1, 100],
                'kernel': ['poly'],
                'gamma': [0.0, 0.5],
                'degree': [3]
            }]
        else:
            param_dic = [{
                'C': [0.0001, 0.01, 1, 100, 10000],
                'kernel': ['rbf', 'sigmoid'],
                'gamma': [0.0, 0.5, 1]
            }, {
                'C': [0.0001, 0.01, 1, 100, 10000],
                'kernel': ['linear']
            }, {
                'C': [0.0001, 0.01, 1, 100, 10000],
                'kernel': ['poly'],
                'gamma': [0.0, 0.5],
                'degree': [2, 3]
            }]
            #param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.5, 1]}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['linear']}, {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel':['poly'], 'gamma':[0.0, 0.5, 1], 'degree':[2,3]}]
            #param_dic = [{'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.01, 0.1, 1, 10, 100]}, {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['linear']}, {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'kernel':['poly'], 'gamma':[0.0, 0.01, 0.1, 1, 10, 100], 'degree':[2,3,4]}]
        print "INFO: Grid Search Parameters:"
        for p in range(0, len(param_dic)):
            print "INFO: ",
            for key in param_dic[p]:
                print key, ' = ', param_dic[p][key],
            print ""
        print "INFO: ====================2: SVM with kernel============="
        clf = svm.SVC()
    elif model_name == "nu_svm":
        ### 3: NuSVC
        if mode == "cheap":
            param_dic = [{
                'nu': [0.1, 0.3],
                'kernel': ['rbf', 'sigmoid'],
                'gamma': [0.0, 0.1]
            }, {
                'nu': [0.1, 0.3],
                'kernel': ['linear']
            }, {
                'nu': [0.1, 0.3],
                'kernel': ['poly'],
                'gamma': [0.0, 0.1],
                'degree': [3]
            }]
        else:
            param_dic = [{
                'nu': [0.1, 0.2, 0.3],
                'kernel': ['rbf', 'sigmoid'],
                'gamma': [0.0, 0.1, 1, 10]
            }, {
                'nu': [0.1, 0.2, 0.3],
                'kernel': ['linear']
            }, {
                'nu': [0.1, 0.2, 0.3],
                'kernel': ['poly'],
                'gamma': [0.0, 0.1, 1, 10],
                'degree': [2, 3]
            }]
            #param_dic = [{'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['rbf','sigmoid'], 'gamma':[0.0, 0.1, 1, 10]}, {'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['linear']}, {'nu': [0.1, 0.2, 0.3, 0.4], 'kernel':['poly'], 'gamma':[0.0, 0.1, 1, 10], 'degree':[2,3]}]
        print "INFO: Grid Search Parameters:"
        for p in range(0, len(param_dic)):
            print "INFO: ",
            for key in param_dic[p]:
                print key, ' = ', param_dic[p][key],
            print ""
        print "INFO: ====================3: NuSVC============="
        clf = svm.NuSVC()
    elif model_name == "logistic_regression":
        ### 4: Logistic Regression
        if mode == "cheap":
            param_dic = [{
                'C': [0.0001, 0.01, 1, 100, 10000],
                'penalty': ['l2']
            }]
        else:
            param_dic = [{
                'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000],
                'penalty': ['l2', 'l1']
            }]
        print "INFO: Grid Search Parameters:"
        print "INFO: C= ", param_dic[0]['C']
        print "INFO: penalty= ", param_dic[0]['penalty']
        print "INFO: ====================4: Logistic Regression============="
        clf = linear_model.LogisticRegression()
    elif model_name == "passive_aggressive_classifier":
        ### 6: Passive Aggressive Classifier
        if mode == "cheap":
            param_dic = [{'C': [0.0001, 0.01, 1, 100, 10000]}]
        else:
            param_dic = [{
                'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
            }]
        print "INFO: Grid Search Parameters:"
        print "INFO: C= ", param_dic[0]['C']
        print "INFO: ====================6: Passive Aggressive Classifier============="
        clf = linear_model.PassiveAggressiveClassifier()
    else:
        print "INFO: Training model selection error: no valid ML model selected!"
        return (0, "none", 0, 0, 0)
    return (clf, model_name, api, cv, param_dic)
Exemplo n.º 13
0
    def fit(self, X, Y, class_weight=None):
        self.classes = list(np.unique(Y))
        nclasses = len(self.classes)
        total_nrows = X.shape[0]
        nfeatures = X.shape[1]

        if self.balanced_bagging:

            class_slices = []
            class_outputs = []
            for c in self.classes:
                mask = (Y == c)
                class_slices.append(X[mask, :])
                class_outputs.append(Y[mask, :])
            min_size = np.min([x.shape[0] for x in class_slices])
            class_bag_size = int(min_size * self.bag_prct)
            print "Balanced bagging, min class size =", class_bag_size
            total_bag_size = (nclasses - 1) * class_bag_size + (
                self.neutral_weight * class_bag_size)
        else:
            total_bag_size = total_nrows

        print "total_bag_size = ", total_bag_size

        if self.num_random_features == 'sqrt':
            features_per_model = int(math.ceil(math.sqrt(nfeatures)))
        elif self.num_random_features == 'log':
            features_per_model = int(math.ceil(math.log(nfeatures, 2)))
        else:
            features_per_model = int(
                math.ceil(nfeatures * self.num_random_features))
        print "Features per model:", features_per_model

        print "[Class Weights]", class_weight

        f_scores = []

        for i in xrange(self.nmodels):
            print "Training model #" + str(i)
            feature_indices = np.random.permutation(
                nfeatures)[:features_per_model]
            print "  Features:", feature_indices

            if self.balanced_bagging:
                input_list = []
                output_list = []
                for i, c in enumerate(self.classes):
                    x = class_slices[i]
                    y = class_outputs[i]
                    n = self.neutral_weight * class_bag_size if c == 0 else class_bag_size
                    row_indices = np.random.permutation(x.shape[0])[:n]
                    row_slice = x[row_indices, :]
                    input_list.append(row_slice[:, feature_indices])
                    output_list.append(y[row_indices])

                inputs = np.concatenate(input_list)
                outputs = np.concatenate(output_list)
            else:
                inputs = X[:, feature_indices]
                outputs = Y

            if self.base_classifier == 'sgd':
                print "  Input shape:", inputs.shape
                n_iter = int(np.ceil(10**6 / float(inputs.shape[0])))
                print "  Num iters: ", n_iter
                model = lin.SGDClassifier(n_iter=n_iter,
                                          shuffle=True,
                                          **self.model_keywords)
            elif self.base_classifier == 'logistic':
                model = lin.LogisticRegression(**self.model_keywords)
            elif self.base_classifier == 'nu-svm':
                model = svm.NuSVC(nu=0.1, kernel='linear')
            elif self.base_classifier == 'svm_tree':
                model = treelearn.SVM_Tree(**self.model_keywords)
            else:
                model = svm.LinearSVC(
                    **self.model_keywords)  # svm.SVC(kernel='poly', degree=2)
            model.fit(inputs, outputs, class_weight=class_weight)
            print model
            #print model.coef_
            # bug in scikits.learn keeps around sample weights after training,
            # making the serialization too bloated for network transfer
            if hasattr(model, 'sample_weight'): model.sample_weight = []

            # remember the balanced accuracy for each model
            pred = model.predict(inputs)
            #print "outputs[100:150]", outputs[100:150]
            #print "pred[100:150]", pred[100:150]

            # compure F-score for model weighting and user feedback
            actual_not_zero = (outputs != 0)
            actual_not_zero_count = np.sum(actual_not_zero)

            pred_not_zero = (pred != 0)
            pred_not_zero_count = np.sum(pred_not_zero)

            correct = (outputs == pred)
            correct_not_zero = np.sum(correct & actual_not_zero, dtype='float')

            print "   Correct NNZ:", correct_not_zero, "Actual NNZ: ", actual_not_zero_count, "Predicted NNZ:", pred_not_zero_count

            if pred_not_zero_count > 0:
                precision = correct_not_zero / float(pred_not_zero_count)
            else:
                precision = 0.0

            if actual_not_zero_count > 0:
                recall = correct_not_zero / float(actual_not_zero_count)
            else:
                recall = 0.0

            if precision > 0 and recall > 0:
                beta_squared = self.recall_importance**2
                denom = beta_squared * precision + recall
                f_score = (1 + beta_squared) * (precision * recall) / denom
            else:
                f_score = 0.0

            print "  Precision:", precision, "Recall:", recall, "F-score:", f_score

            if f_score > 0:
                self.model_features.append(feature_indices)
                f_scores.append(f_score)
                self.models.append(model)

        if len(f_scores) == 0:
            print "!!! No good classifiers kept !!!"
        else:
            f_scores = np.array(f_scores)
            sum_f_scores = np.sum(f_scores)
            if sum_f_scores == 0:
                print "!!! All classifiers are terrible  !!!"
                self.model_scores = f_scores
            else:
                self.model_scores = f_scores / sum_f_scores
                # estimate how good each feature is
                counts = np.zeros(nfeatures)
                feature_scores = np.zeros(nfeatures)

                for f, indices in zip(f_scores, self.model_features):
                    counts[indices] += 1
                    feature_scores[indices] += f
                feature_scores /= counts
                print "Average feature scores:", feature_scores
                #sorted in ascending order
                sort_indices = np.argsort(feature_scores)
                print "Best 5 features:", sort_indices[-5:]

            if self.model_weighting == 'logistic':
                X2 = self.transform_to_classifer_space(X)
                print "Training logistic regression on top of ensemble outputs..."
                self.gating_classifier = lin.LogisticRegression()
                self.gating_classifier.fit(X2, Y, class_weight=class_weight)
            else:
                self.gating_classifier = None
Exemplo n.º 14
0
from sklearn import svm
import copy
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier

rf = RandomForestClassifier(n_estimators=100)
dt = tree.DecisionTreeClassifier()
gnb = GaussianNB()
bnb = BernoulliNB()
mnb = MultinomialNB()
svm_cf = svm.SVC()
maxlist = list()
data = list()
nusvm = svm.NuSVC()
adaboost = AdaBoostClassifier(n_estimators=100)


def CVAndOutPutAccuracies(trainData, classData, fold_no):

    print "Length of labelled array : " + str(len(trainData))
    print "Length of labels array : " + str(len(classData))
    print "Feature length : " + str(len(trainData[0]))

    print "Max Accuracy, Mean Accuracy, Min Accuracy"
    rf_accuracy = cross_val_score(rf, trainData, classData, 'accuracy',
                                  fold_no)
    rf_f1_score = cross_val_score(rf, trainData, classData, 'f1_weighted',
                                  fold_no)
    print "Random Forest Accuracy scores :"
Exemplo n.º 15
0
with codecs.open('emotions', 'r', 'utf8') as reader:
    for line in reader:
        word, rank = line.strip().split(' ')
        emotions[word] = int(rank)

X, y = [], []
for key, val in vectors.items():
    X.append(val)
    if key in emotions:
        y.append(2)
    elif key in topics:
        y.append(1)
    else:
        y.append(0)

clf = svm.NuSVC(nu=0.005)
clf.fit(X, y)

joblib.dump(clf, 'attr_clf.pkl')

X, y = [], []
for key, val in vectors.items():
    if key in emotions:
        X.append(val)
        y.append(emotions[key])

clf = svm.NuSVC(nu=0.005)
clf.fit(X, y)

joblib.dump(clf, 'rank_clf.pkl')
Exemplo n.º 16
0
from sklearn.cross_validation import train_test_split
from sklearn import svm

# load data from datasets
iris = datasets.load_iris()
iris_x = iris.data
iris_y = iris.target

# divide the datasets into train sets and test sets
train_x, test_x, train_y, test_y = train_test_split(iris_x,
                                                    iris_y,
                                                    test_size=0.2)

# define SVM
svm_svc_clf = svm.SVC()
svm_nusvc_clf = svm.NuSVC()
svm_linear_clf = svm.LinearSVC()

# train
svm_svc_clf.fit(train_x, train_y)
svm_nusvc_clf.fit(train_x, train_y)
svm_linear_clf.fit(train_x, train_y)

# predict
svc_y = svm_svc_clf.predict(test_x)
nusvc_y = svm_nusvc_clf.predict(test_x)
linear_y = svm_linear_clf.predict(test_x)

# accuracy
svc_counter = 0
nusvc_counter = 0
Exemplo n.º 17
0
def tune_model(X, y, n_it=30, models=['xgb']):
    seed = 7
    test_size = 0.30
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size,
                                                        random_state=seed)
    for model1 in models:
        if model1 == 'Logistic':
            logistic = LogisticRegression()
            distributions = {'C': [1, 2, 3, 4], 'penalty': ['l1', 'l2']}
            clf = RandomizedSearchCV(logistic,
                                     distributions,
                                     random_state=0,
                                     n_iter=n_it,
                                     cv=5)
            clf.fit(X_train, y_train)
            # print(clf.best_params_)
            # print(clf.cv_results_)
            pred = clf.predict(X_test)
            print("The best Logistic Balanced Accuracy is ",
                  balanced_accuracy_score(y_test, pred) * 100)
        elif model1 == 'xgb':
            model = XGBClassifier()
            distributions = {
                'booster': ['gbtree', 'gblinear', 'dart'],
                'eta': [0, 0.2, 0.4, 0.6, 0.8, 1],
                'max_depth': [50, 100, 150, 200, 250, 300],
                'lambda': [0, 0.2, 0.4, 0.6, 0.8, 1],
                'alpha': [0, 0.2, 0.4, 0.6, 0.8, 1],
                'grow_policy': ['depthwise', 'lossguide']
            }
            clf = RandomizedSearchCV(model,
                                     distributions,
                                     random_state=0,
                                     n_iter=n_it,
                                     cv=5)
            clf.fit(X_train, y_train)
            pred = clf.predict(X_test)
            print("The best XGBoost Balanced Accuracy is ",
                  balanced_accuracy_score(y_test, pred) * 100)
        elif model1 == 'SVM':
            model = svm.NuSVC(gamma='auto')
            distributions = {
                'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
                'degree': [4, 5, 6, 7, 8, 9, 10]
            }
            clf = RandomizedSearchCV(model,
                                     distributions,
                                     random_state=0,
                                     n_iter=n_it,
                                     cv=5)
            clf.fit(X_train, y_train)
            pred = clf.predict(X_test)
            print("The best SVM Balanced Accuracy is ",
                  balanced_accuracy_score(y_test, pred) * 100)
        elif model1 == 'RandomForest':
            model = RandomForestClassifier()
            distributions = {
                'n_estimators': [50, 100, 150, 200, 250, 300],
                'criterion': ['gini', 'entropy'],
                'min_samples_split': [2, 3, 4, 5],
                'min_samples_leaf': [2, 3, 4, 5],
                'max_features': ['auto', 'sqrt', 'log2']
            }
            clf = RandomizedSearchCV(model,
                                     distributions,
                                     random_state=0,
                                     n_iter=n_it,
                                     cv=5)
            clf.fit(X_train, y_train)
            pred = clf.predict(X_test)
            print("The best Random Forest Balanced Accuracy is ",
                  balanced_accuracy_score(y_test, pred) * 100)
        else:
            print(
                model1,
                "- Name not detected. Try using one of the models that are defined"
            )


# import sklearn.ensemble
# import sklearn.model_selection
# import sklearn.svm

# import optuna

# # FYI: Objective functions can take additional arguments
# # (https://optuna.readthedocs.io/en/stable/faq.html#objective-func-additional-args).

# def GuidedTuneModel(X,y):
#     X = x
#     y=y
#     def objective(trial):

#         classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
#         if classifier_name == "SVC":
#             svc_c = trial.suggest_loguniform("svc_c", 1e-10, 1e10)
#             classifier_obj = sklearn.svm.SVC(C=svc_c, gamma="auto")
#         else:
#             rf_max_depth = int(trial.suggest_loguniform("rf_max_depth", 2, 32))
#             classifier_obj = sklearn.ensemble.RandomForestClassifier(
#                 max_depth=rf_max_depth, n_estimators=10
#             )

#         score = sklearn.model_selection.cross_val_score(classifier_obj, X, y, n_jobs=-1, cv=3)
#         accuracy = score.mean()
#         return accuracy
#     study = optuna.create_study(direction="maximize")
#     study.optimize(objective, n_trials=100)
#     print(study.best_trial)
Exemplo n.º 18
0
# Cross validation on the Training model, parameters can be varied according to choice
kf = KFold(trainingIds.size, n_folds=4, shuffle=True)

print("Starting CV Random Forest")
#for SVM

cnt = 0
for train_index, test_index in kf:
    TrainX1 = np.array(X[train_index])
    TrainY = np.array(trainingLabels[train_index])
    trainingIds1 = np.array(trainingIds[train_index])
    print(TrainX1.shape)
    print(TrainY.shape)
    print(trainingIds1.shape)
    clf = svm.NuSVC(nu=0.02, probability=True, cache_size=24000)
    clf.fit(TrainX1, TrainY)
    TestX = np.array(X[test_index])
    TestY = np.array(trainingLabels[test_index])
    trainingIds1 = np.array(trainingIds[test_index])
    print(TestX.shape)
    print(TestY.shape)
    print(trainingIds1.shape)
    mas = clf.score(TestX, TestY)
    print("Cross validation mean accuracy: ", mas)
    cnt = cnt + 1
    if cnt == 1:
        maxMas = mas
        model = clf
    elif mas > maxMas:
        maxMas = mas
Exemplo n.º 19
0
    if title is not None:
        plt.title(title)

    plt.show()

def add_noise(data, sigma=32.0, size=None):
    # Noise function, sigma 16.0 ~ 6% noise
    if not size:
        size = data.shape
    noise = np.random.normal(0.0, sigma, size)
    return np.clip(data+noise,0,255)


# PCA and plotting for noisy dataset
#pca2, x3, x4 = do_pca(add_noise(data),target)
# Plot visualization
#plot_embedding(x3)

# PCA and plotting for original dataset
pca1, x1, x2 = do_pca(n_train,n_test,0.8)
# Plot visualization
#plot_embedding(x1)

# SVM classification for scoring original estimator (PCA)
svm1 = svm.NuSVC(verbose=True)
svm1.fit(x2[2000:], list(labels2)[2000:])
svm_score = svm1.score(x2[:2000], list(labels2)[:2000])

print svm_score

Exemplo n.º 20
0
# -*- coding: utf-8 -*-

from sklearn import svm
from sklearn.datasets import load_iris

from sklearn_porter import Porter

iris_data = load_iris()
X, y = iris_data.data, iris_data.target
clf = svm.NuSVC(gamma=0.001, kernel='rbf', random_state=0)
clf.fit(X, y)

# Cheese!

result = Porter(language='php').port(clf)
print(result)
"""
<?php

class Tmp {

    public static function predict($atts) {

        $svs = [[4.9000000000000004, 3.0, 1.3999999999999999, 0.20000000000000001], [4.5999999999999996, 3.1000000000000001, 1.5, 0.20000000000000001], [5.4000000000000004, 3.8999999999999999, 1.7, 0.40000000000000002], [5.0, 3.3999999999999999, 1.5, 0.20000000000000001], [4.9000000000000004, 3.1000000000000001, 1.5, 0.10000000000000001], [5.4000000000000004, 3.7000000000000002, 1.5, 0.20000000000000001], [4.7999999999999998, 3.3999999999999999, 1.6000000000000001, 0.20000000000000001], [5.7000000000000002, 4.4000000000000004, 1.5, 0.40000000000000002], [5.7000000000000002, 3.7999999999999998, 1.7, 0.29999999999999999], [5.0999999999999996, 3.7999999999999998, 1.5, 0.29999999999999999], [5.4000000000000004, 3.3999999999999999, 1.7, 0.20000000000000001], [5.0999999999999996, 3.7000000000000002, 1.5, 0.40000000000000002], [5.0999999999999996, 3.2999999999999998, 1.7, 0.5], [4.7999999999999998, 3.3999999999999999, 1.8999999999999999, 0.20000000000000001], [5.0, 3.0, 1.6000000000000001, 0.20000000000000001], [5.0, 3.3999999999999999, 1.6000000000000001, 0.40000000000000002], [5.2000000000000002, 3.5, 1.5, 0.20000000000000001], [4.7000000000000002, 3.2000000000000002, 1.6000000000000001, 0.20000000000000001], [4.7999999999999998, 3.1000000000000001, 1.6000000000000001, 0.20000000000000001], [5.4000000000000004, 3.3999999999999999, 1.5, 0.40000000000000002], [4.9000000000000004, 3.1000000000000001, 1.5, 0.10000000000000001], [4.9000000000000004, 3.1000000000000001, 1.5, 0.10000000000000001], [5.0999999999999996, 3.3999999999999999, 1.5, 0.20000000000000001], [4.5, 2.2999999999999998, 1.3, 0.29999999999999999], [5.0, 3.5, 1.6000000000000001, 0.59999999999999998], [5.0999999999999996, 3.7999999999999998, 1.8999999999999999, 0.40000000000000002], [4.7999999999999998, 3.0, 1.3999999999999999, 0.29999999999999999], [5.0999999999999996, 3.7999999999999998, 1.6000000000000001, 0.20000000000000001], [5.2999999999999998, 3.7000000000000002, 1.5, 0.20000000000000001], [7.0, 3.2000000000000002, 4.7000000000000002, 1.3999999999999999], [6.4000000000000004, 3.2000000000000002, 4.5, 1.5], [6.9000000000000004, 3.1000000000000001, 4.9000000000000004, 1.5], [5.5, 2.2999999999999998, 4.0, 1.3], [6.5, 2.7999999999999998, 4.5999999999999996, 1.5], [5.7000000000000002, 2.7999999999999998, 4.5, 1.3], [6.2999999999999998, 3.2999999999999998, 4.7000000000000002, 1.6000000000000001], [4.9000000000000004, 2.3999999999999999, 3.2999999999999998, 1.0], [6.5999999999999996, 2.8999999999999999, 4.5999999999999996, 1.3], [5.2000000000000002, 2.7000000000000002, 3.8999999999999999, 1.3999999999999999], [5.0, 2.0, 3.5, 1.0], [5.9000000000000004, 3.0, 4.2000000000000002, 1.5], [6.0, 2.2000000000000002, 4.0, 1.0], [6.0999999999999996, 2.8999999999999999, 4.7000000000000002, 1.3999999999999999], [5.5999999999999996, 2.8999999999999999, 3.6000000000000001, 1.3], [6.7000000000000002, 3.1000000000000001, 4.4000000000000004, 1.3999999999999999], [5.5999999999999996, 3.0, 4.5, 1.5], [5.7999999999999998, 2.7000000000000002, 4.0999999999999996, 1.0], [6.2000000000000002, 2.2000000000000002, 4.5, 1.5], [5.5999999999999996, 2.5, 3.8999999999999999, 1.1000000000000001], [5.9000000000000004, 3.2000000000000002, 4.7999999999999998, 1.8], [6.0999999999999996, 2.7999999999999998, 4.0, 1.3], [6.2999999999999998, 2.5, 4.9000000000000004, 1.5], [6.0999999999999996, 2.7999999999999998, 4.7000000000000002, 1.2], [6.5999999999999996, 3.0, 4.4000000000000004, 1.3999999999999999], [6.7999999999999998, 2.7999999999999998, 4.7999999999999998, 1.3999999999999999], [6.7000000000000002, 3.0, 5.0, 1.7], [6.0, 2.8999999999999999, 4.5, 1.5], [5.7000000000000002, 2.6000000000000001, 3.5, 1.0], [5.5, 2.3999999999999999, 3.7999999999999998, 1.1000000000000001], [5.5, 2.3999999999999999, 3.7000000000000002, 1.0], [5.7999999999999998, 2.7000000000000002, 3.8999999999999999, 1.2], [6.0, 2.7000000000000002, 5.0999999999999996, 1.6000000000000001], [5.4000000000000004, 3.0, 4.5, 1.5], [6.0, 3.3999999999999999, 4.5, 1.6000000000000001], [6.7000000000000002, 3.1000000000000001, 4.7000000000000002, 1.5], [6.2999999999999998, 2.2999999999999998, 4.4000000000000004, 1.3], [5.5999999999999996, 3.0, 4.0999999999999996, 1.3], [5.5, 2.5, 4.0, 1.3], [5.5, 2.6000000000000001, 4.4000000000000004, 1.2], [6.0999999999999996, 3.0, 4.5999999999999996, 1.3999999999999999], [5.7999999999999998, 2.6000000000000001, 4.0, 1.2], [5.0, 2.2999999999999998, 3.2999999999999998, 1.0], [5.5999999999999996, 2.7000000000000002, 4.2000000000000002, 1.3], [5.7000000000000002, 3.0, 4.2000000000000002, 1.2], [5.7000000000000002, 2.8999999999999999, 4.2000000000000002, 1.3], [6.2000000000000002, 2.8999999999999999, 4.2999999999999998, 1.3], [5.0999999999999996, 2.5, 3.0, 1.1000000000000001], [5.7000000000000002, 2.7999999999999998, 4.0999999999999996, 1.3], [5.7999999999999998, 2.7000000000000002, 5.0999999999999996, 1.8999999999999999], [6.2999999999999998, 2.8999999999999999, 5.5999999999999996, 1.8], [4.9000000000000004, 2.5, 4.5, 1.7], [6.5, 3.2000000000000002, 5.0999999999999996, 2.0], [6.4000000000000004, 2.7000000000000002, 5.2999999999999998, 1.8999999999999999], [5.7000000000000002, 2.5, 5.0, 2.0], [5.7999999999999998, 2.7999999999999998, 5.0999999999999996, 2.3999999999999999], [6.4000000000000004, 3.2000000000000002, 5.2999999999999998, 2.2999999999999998], [6.5, 3.0, 5.5, 1.8], [6.0, 2.2000000000000002, 5.0, 1.5], [5.5999999999999996, 2.7999999999999998, 4.9000000000000004, 2.0], [6.2999999999999998, 2.7000000000000002, 4.9000000000000004, 1.8], [6.2000000000000002, 2.7999999999999998, 4.7999999999999998, 1.8], [6.0999999999999996, 3.0, 4.9000000000000004, 1.8], [7.2000000000000002, 3.0, 5.7999999999999998, 1.6000000000000001], [6.2999999999999998, 2.7999999999999998, 5.0999999999999996, 1.5], [6.0999999999999996, 2.6000000000000001, 5.5999999999999996, 1.3999999999999999], [6.4000000000000004, 3.1000000000000001, 5.5, 1.8], [6.0, 3.0, 4.7999999999999998, 1.8], [6.9000000000000004, 3.1000000000000001, 5.4000000000000004, 2.1000000000000001], [6.9000000000000004, 3.1000000000000001, 5.0999999999999996, 2.2999999999999998], [5.7999999999999998, 2.7000000000000002, 5.0999999999999996, 1.8999999999999999], [6.7000000000000002, 3.0, 5.2000000000000002, 2.2999999999999998], [6.2999999999999998, 2.5, 5.0, 1.8999999999999999], [6.5, 3.0, 5.2000000000000002, 2.0], [6.2000000000000002, 3.3999999999999999, 5.4000000000000004, 2.2999999999999998], [5.9000000000000004, 3.0, 5.0999999999999996, 1.8]];
        $coeffs = [[4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 0.0, 4.6863813658892557, 0.0, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 4.6863813658892557, 0.0, 0.0, -0.0, -0.0, -0.0, -4.6863813658892557, -0.0, -0.0, -0.0, -4.6863813658892557, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -0.0, -4.6863813658892557, -0.0, -0.0, -4.6863813658892557, -0.0, -4.6863813658892557, -0.0, -4.6863813658892557, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -0.0, -0.0, -0.0, -0.0, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -0.0, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -4.6863813658892557, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -0.0, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -0.0, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948, -2.1272220789292948], [0.0, 0.0, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 2.1272220789292948, 0.0, 2.1272220789292948, 2.1272220789292948, 0.0, 2.1272220789292948, 2.1272220789292948, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 47.529341773693893, 0.0, 0.0, 0.0, 0.0, 47.529341773693893, 0.0, 47.529341773693893, 47.529341773693893, 0.0, 47.529341773693893, 0.0, 47.529341773693893, 0.0, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 0.0, 0.0, 0.0, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 47.529341773693893, 0.0, 0.0, 47.529341773693893, 47.529341773693893, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -0.0, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -47.529341773693893, -0.0, -47.529341773693893]];
        $inters = [0.10061840191760112, 0.051748160156319709, -0.084181689668018464];
        $classes = [0, 1, 2];

        // exp(-y|x-x'|^2)
        $kernels = array_fill(0, 105, 0);
        for ($i = 0; $i < 105; $i++) {
Exemplo n.º 21
0
    print "gt: ", ground_truth_labels
    return float(collections.Counter(subtract)[0]) / len(ground_truth_labels)


labels, vectors = read_csv('./kaggle/train.csv', 1000)
# test_labels, test_vectors = read_csv('./kaggle/test.csv',10)
# one_hot_labels_test = one_hot_encode(test_labels)
""" 
divide training data and validation data
"""
N_train = int(len(labels) * 0.7)
N_validation = len(labels) - N_train

vectors_train, vectors_validation, labels_train, labels_validation = train_test_split(
    vectors, labels, test_size=N_validation)
# data distribution function

#create instance of SVC
clf = svm.NuSVC(kernel='rbf', nu=0.01)

#determine the hyperplane
print "determining Hyperplane..."
clf.fit(vectors_train, labels_train)
print "determined Hyperplane"
#prediction using the hyperplane
print "predicting . . . "
pd = clf.predict(vectors_validation)

#calculate the accuracy
print "accuracy: ", accuracy(pd, labels_validation)
Exemplo n.º 22
0
        trainingLabels = num.concatenate((num.zeros(averageObjectTraining.matrix.shape[0]),\
                        1 * num.ones(variableObjectTraining.matrix.shape[0]),\
                        2 * num.ones(EBTraining.matrix.shape[0])))
        testingLabels = num.concatenate((num.zeros(averageObjectTesting.matrix.shape[0]),\
                        1 * num.ones(variableObjectTesting.matrix.shape[0]),\
                        2 * num.ones(EBTesting.matrix.shape[0])))

        #Creating classifiers
        decisionTree   = DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0)
        randomForest   = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=1, random_state=0)
        extraTrees     = ExtraTreesClassifier(n_estimators=10, max_depth=None, min_samples_split=1, random_state=0)
        SVCrbf = svm.SVC(kernel='rbf', gamma=0.7)
        SVClinear = svm.SVC(kernel='linear')
        SVCpoly = svm.SVC(kernel='poly', degree=3)
        LinearSVC = svm.LinearSVC()
        NuSVC = svm.NuSVC()

        #Applying classifiers to data
        decisionTree   = decisionTree.fit(trainingSet, trainingLabels)
        randomForest   = randomForest.fit(trainingSet, trainingLabels)
        extraTrees     = extraTrees.fit(trainingSet, trainingLabels)
        SVCrbf = SVCrbf.fit(trainingSet, trainingLabels)
        SVClinear = SVClinear.fit(trainingSet, trainingLabels)
        SVCpoly = SVCpoly.fit(trainingSet, trainingLabels)
        LinearSVC = LinearSVC.fit(trainingSet, trainingLabels)
        NuSVC = NuSVC.fit(trainingSet, trainingLabels)

        #Finding predictions 
        decisionTreePredictions = decisionTree.predict(testingSet)
        randomForestPredictions = randomForest.predict(testingSet)
        extraTreesPredictions = extraTrees.predict(testingSet)
Exemplo n.º 23
0
    def train(self, dset, enable=[0, 0, 1, 0, 1, 1], cc=False):

        mods = ['KNN', 'SVC', 'SVM', 'XGBoost', 'MLP', 'RF']  # local mods list

        def tune_param(clf, param, name):

            model = GridSearchCV(clf,
                                 param_grid=param,
                                 return_train_score=True,
                                 cv=20)
            model.fit(train_X, train_y)

            print(name + " INFO:")
            print("Best hyper paramters:", model.best_params_)
            print("Best accuracy value: ", model.best_score_)

            clf.set_params(**model.best_params_)
            clf.fit(train_X, train_y)  # actually fitting the model
            print("prediction score: ", model.score(test_X, test_y))
            print(clf)

            # plot_posterior(X, y, newX, newy, clf, name, savefile)
            # xx, yy, Z, new_p, zz = plot_decision_boundaries(X, y ,clf, h=h)
            return clf

        post = []

        if cc:
            train_X, train_y, test_X, test_y = self.Ctrain_X[
                dset], self.Ctrain_y[dset], self.Ctest_X[dset], self.Ctest_y[
                    dset]
        else:
            train_X, train_y, test_X, test_y = self.train_X[
                dset], self.train_y[dset], self.test_X[dset], self.test_y[dset]

        ##### KNN #####

        if enable[0] == 1:
            tuned_param = [{
                'n_neighbors': [3, 5, 7],
                'leaf_size': range(10, 100, 10)
            }, {
                'n_neighbors': [3, 5, 7],
                'leaf_size': range(10, 100, 10)
            }, {
                'n_neighbors': [7]
            }, {
                'n_neighbors': [3, 5, 7],
                'leaf_size': range(10, 100, 10)
            }, {
                'n_neighbors': [3, 5, 7],
                'leaf_size': range(10, 100, 10)
            }]
            temp = tune_param(KNeighborsClassifier(), tuned_param[dset],
                              mods[0])
            post.append(temp)

        ##### SVC #####

        if enable[1] == 1:
            tuned_param = [{
                'gamma': ['auto'],
                'probability': [True]
            }, {
                'gamma': ['auto'],
                'probability': [True]
            }, {
                'C': np.linspace(1, 10, 10),
                'probability': [True]
            }, {
                'gamma': ['auto'],
                'probability': [True]
            }, {
                'gamma': ['auto'],
                'probability': [True]
            }]
            temp = tune_param(svm.SVC(), tuned_param[dset], mods[1])
            post.append(temp)

        ##### nuSVC #####

        if enable[2] == 1:
            tuned_param = [{
                'gamma': ['auto'],
                'probability': [True]
            }, {
                'gamma': ['auto'],
                'probability': [True]
            }, {
                'probability': [True]
            }, {
                'gamma': ['auto'],
                'probability': [True]
            }, {
                'gamma': ['auto'],
                'probability': [True]
            }]
            temp = tune_param(svm.NuSVC(), tuned_param[dset], mods[2])
            post.append(temp)

        ##### xgbooster #####

        if enable[3] == 1:
            tuned_param = [{
                'n_jobs': [-1],
                'learning_rate': np.linspace(0, 1, 20),
                'n_estimators': [64, 128, 256],
                'gamma': [0],
                'objective': ['binary:logistic']
            }, {
                'n_jobs': [-1],
                'learning_rate': np.linspace(0, 1, 20),
                'n_estimators': [64, 128, 256],
                'gamma': [0],
                'objective': ['binary:logistic']
            }, {
                'n_jobs': [-1],
                'learning_rate': np.linspace(0, 1, 20),
                'n_estimators': [64, 128, 256],
                'gamma': [0]
            }, {
                'n_jobs': [-1],
                'learning_rate': np.linspace(0, 1, 20),
                'n_estimators': [64, 128, 256],
                'gamma': [0],
                'objective': ['binary:logistic']
            }, {
                'n_jobs': [-1],
                'learning_rate': np.linspace(0, 1, 20),
                'n_estimators': [64, 128, 256],
                'gamma': [0],
                'objective': ['binary:logistic']
            }]
            temp = tune_param(xgb.XGBClassifier(objective='binary:logistic'),
                              tuned_param[dset], mods[3])
            post.append(temp)

        #### mlp #####

        if enable[4] == 1:
            tuned_param = [{
                'alpha': [0],
                'max_iter': [7000],
                'hidden_layer_sizes': [100],
                'learning_rate_init': [0.0001]
            }, {
                'alpha': [0],
                'max_iter': [7000],
                'hidden_layer_sizes': [100],
                'learning_rate_init': [0.0001]
            }, {
                'alpha': [0],
                'max_iter': [10000],
                'activation': ['logistic', 'relu'],
                'learning_rate_init': [0.0001],
                'solver': ['lbfgs']
            }, {
                'alpha': [0],
                'max_iter': [7000],
                'hidden_layer_sizes': [100],
                'learning_rate_init': [0.0001]
            }, {
                'alpha': [0],
                'max_iter': [7000],
                'hidden_layer_sizes': [100],
                'learning_rate_init': [0.0001]
            }]
            temp = tune_param(MLPClassifier(), tuned_param[dset], mods[4])
            post.append(temp)

        #### RF #####

        if enable[5] == 1:
            tuned_param = [{
                'max_depth': [10],
                'n_estimators': [128]
            }, {
                'max_depth': [10],
                'n_estimators': [128]
            }, {
                'max_depth': [10],
                'n_estimators': [128]
            }, {
                'max_depth': [10],
                'n_estimators': [128]
            }, {
                'max_depth': [10],
                'n_estimators': [128]
            }]
            temp = tune_param(RandomForestClassifier(n_jobs=-1),
                              tuned_param[dset], mods[5])
            post.append(temp)

        qda = QuadraticDiscriminantAnalysis()
        qda.fit(train_X, train_y)
        post.append(qda)

        return post
Exemplo n.º 24
0
    min_val, max_val = np.inf, -np.inf
    train_jitter, min_val, max_val = norm(pd.read_csv(filepath +
                                                      train_jitter).values,
                                          train=True,
                                          min_val=min_val,
                                          max_val=max_val)
    train_label = np.load(filepath + train_label)
    dev_jitter, dev_label = norm(pd.read_csv(filepath + dev_jitter).values,
                                 train=False,
                                 min_val=min_val,
                                 max_val=max_val), np.load(filepath +
                                                           dev_label)
    test_jitter, test_label = norm(pd.read_csv(filepath + test_jitter).values,
                                   train=False,
                                   min_val=min_val,
                                   max_val=max_val), np.load(filepath +
                                                             test_label)

    clf = svm.NuSVC(gamma='auto')
    clf.fit(train_jitter, train_label)

    # predict
    train_pred_labels = clf.predict(train_jitter)
    dev_pred_labels = clf.predict(dev_jitter)
    test_pred_labels = clf.predict(test_jitter)

    print(metrics(train_label, train_pred_labels))
    print(metrics(dev_label, dev_pred_labels))
    print(metrics(test_label, test_pred_labels))
Exemplo n.º 25
0
    ec.get_feature_names(),
    titanic.select_dtypes(exclude="object").drop("Y", axis=1).columns.tolist()
])
Y = titanic.Y
X_new, y_new = ro.fit_resample(X, Y)
X_train, X_test, y_train, y_test = train_test_split(X_new,
                                                    y_new,
                                                    random_state=16)

# %%
# Model
clf = GradientBoostingClassifier(n_estimators=10000, random_state=16)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

clf_svm = svm.NuSVC(gamma='auto', random_state=16)
clf_svm.fit(X_train, y_train)

clf_MLP = MLPClassifier(solver='lbfgs',
                        hidden_layer_sizes={2, 13},
                        max_iter=10000,
                        alpha=1e-5,
                        random_state=16)
clf_MLP.fit(X_train, y_train)

clf_forest = RandomForestClassifier(n_estimators=10000, random_state=16)
clf_forest.fit(X_train, y_train)

# %%
mlp_y_pred = clf_MLP.predict(X_test)
svm_y_pred = clf_svm.predict(X_test)
Exemplo n.º 26
0
model = svm.SVC(kernel='linear', C=0.4)
model = model.fit(X_train, y_train)

score_report(X_test, y_test)

model = svm.SVC(kernel='rbf', gamma=1.0, C=0.13)
model = model.fit(X_train, y_train)

score_report(X_test, y_test)

model = svm.SVC(kernel='poly', degree=2, C=0.05)
model = model.fit(X_train, y_train)

score_report(X_test, y_test)

model = svm.NuSVC(probability=True)
model = model.fit(X_train, y_train)

score_report(X_test, y_test)

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

model = LinearDiscriminantAnalysis()
model = model.fit(X_train, y_train)

score_report(X_test, y_test)

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

model = QuadraticDiscriminantAnalysis()
model = model.fit(X_train, y_train)
Exemplo n.º 27
0
train_X1 = data_X[0:50, :]
train_X2 = data_X[81:, :]
train_y1 = data_y[0:50, :]
train_y2 = data_y[81:, :]
X_train = np.concatenate((train_X1, train_X2), axis=0)
y_train = np.concatenate((train_y1, train_y2), axis=0)
y_train = y_train.reshape(-1)

X_test = data_X[50:80, :]
y_test = data_y[50:80, :]
y_test = y_test.reshape(-1)
#print (X_train.shape,y_train.shape)
#print (X_test.shape,y_test.shape)

clf = svm.NuSVC(nu=0.2)
clf.fit(X_train, y_train)
score_list.append(clf.score(X_test, y_test))
clf = svm.SVC(C=1, kernel='sigmoid', degree=2, gamma=100)
clf.fit(X_train, y_train)
score_list.append(clf.score(X_test, y_test))
clf = LogisticRegression(C=1, penalty='l1')
clf.fit(X_train, y_train)
score_list.append(clf.score(X_test, y_test))
clf = GradientBoostingClassifier()
clf.fit(X_train, y_train)
score_list.append(clf.score(X_test, y_test))
clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)
score_list.append(clf.score(X_test, y_test))
clf = AdaBoostClassifier(n_estimators=100)
Exemplo n.º 28
0
 def init_svm(self) -> None:
     all_models = [svm.NuSVC(probability=True), svm.SVC(probability=True)]
     self.models.extend(all_models)
     models = ["nu", "svc"]
     for mod in models:
         self.model_keys[mod] = "svm"
Exemplo n.º 29
0
Perform binary classification using non-linear SVC
with RBF kernel. The target to predict is a XOR of the
inputs.

"""
print __doc__

import numpy as np
import pylab as pl
from sklearn import svm

xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))
np.random.seed(0)
X = np.random.randn(300, 2)
Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

# fit the model
clf = svm.NuSVC()
clf.fit(X, Y)

# plot the line, the points, and the nearest vectors to the plane
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

pl.set_cmap(pl.cm.Paired)
pl.pcolormesh(xx, yy, Z)
pl.scatter(X[:, 0], X[:, 1], c=Y)

pl.axis('tight')
pl.show()
import pylab as pl
from sklearn import svm, datasets

# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]  # we only take the first two features. We could
# avoid this ugly slicing by using a two-dim dataset
Y = iris.target

h = .02  # step size in the mesh

# we create an instance of SVM and fit out data. We do not scale our
# data since we want to plot the support vectors
svc = svm.SVC(kernel='linear').fit(X, Y)
rbf_svc = svm.SVC(kernel='poly').fit(X, Y)
nu_svc = svm.NuSVC(kernel='linear').fit(X, Y)
lin_svc = svm.LinearSVC().fit(X, Y)

# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

# title for the plots
titles = [
    'SVC with linear kernel', 'SVC with polynomial (degree 3) kernel',
    'NuSVC with linear kernel', 'LinearSVC (linear kernel)'
]

pl.set_cmap(pl.get_cmap('jet'))