示例#1
0
def VisualizeBoundary(X, y, xlim, ylim, model, gamma, dataSetExample):

    pd.PlotData(X, y, xlim, ylim, showplot=False)

    x1plot = np.linspace(np.amin(X[:, 0]), np.amax(X[:, 0]), 100)
    x2plot = np.linspace(np.amin(X[:, 1]), np.amax(X[:, 1]), 100)

    X1, X2 = np.meshgrid(x1plot, x2plot)
    [m, n] = X1.shape

    vals = np.zeros_like((X1))
    for i in range(m):
        this_X = np.require(np.vstack((X1[:, i], X2[:, i])).T,
                            requirements='C')

        #http://scikit-learn.org/stable/modules/generated/sklearn.svm.libsvm.fit.html
        #http://scikit-learn.org/stable/modules/generated/sklearn.svm.libsvm.predict.html
        vals[:, i] = libsvm.predict(this_X,
                                    support=model[0],
                                    SV=model[1],
                                    nSV=model[2],
                                    sv_coef=model[3],
                                    intercept=model[4],
                                    probA=model[5],
                                    probB=model[6],
                                    kernel='rbf',
                                    gamma=gamma)

    plot.contour(X1, X2, vals, colors='red')
    plot.title('SVM (Gaussian Kernel) Decision Boundary (Example Dataset %d)' %
               (dataSetExample))
    plot.show()
 def predict(self, X):
     svm_type = LIBSVM_IMPL.index(self.impl)
     if not self.__X:
         print ("You should train the model first!!!")
         sys.exit(3)
     else:
         gram_matr_predict_new = self.string_kernel(X, self.__X)
         gram_matr_predict_new = np.asarray(gram_matr_predict_new, dtype=np.float64, order="C")
         return libsvm.predict(
             gram_matr_predict_new,
             self.support_,
             self.support_vectors_,
             self.n_support_,
             self.dual_coef_,
             self._intercept_,
             self._label,
             self.probA_,
             self.probB_,
             svm_type=svm_type,
             kernel=self.kernel,
             C=self.C,
             nu=self.nu,
             probability=self.probability,
             degree=self.degree,
             shrinking=self.shrinking,
             tol=self.tol,
             cache_size=self.cache_size,
             coef0=self.coef0,
             gamma=self._gamma,
             epsilon=self.epsilon,
         )
示例#3
0
    def _dense_predict(self, X):
        kernel = self.kernel
        if callable(self.kernel):
            if X.shape[1] != self.shape_fit_[0]:
                raise ValueError("X.shape[1] = %d should be equal to %d, "
                                 "the number of samples at training time" %
                                 (X.shape[1], self.shape_fit_[0]))
            kernel = self.kernel(X, self.__Xfit)
            if sp.issparse(kernel):
                kernel = kernel.toarray()
            kernel = 'precomputed'
            X = np.asarray(kernel, dtype=np.float64, order='C')

        if X.ndim == 1:
            X = check_array(X, order='C', accept_large_sparse=False)


        svm_type = 0 #for svc

        return libsvm.predict(
            X, self.support_, self.support_vectors_, self.n_support_,
            self._dual_coef_, self._intercept_,
            self.probA_, self.probB_, svm_type=svm_type, kernel=kernel,
            degree=self.degree, coef0=self.coef0, gamma=self._gamma,
            cache_size=self.cache_size)
示例#4
0
 def modelEvaluate(self, x_test, y_test, path):
     start = datetime.datetime.now()
     pred_y = libsvm.predict(
         x_test.as_matrix().copy(order='C').astype(np.float64),
         *(self.model))
     end = datetime.datetime.now()
     print self.name + ' predicte time is ' + str(end - start)
     print 'crosstab:{0}'.format(pd.crosstab(y_test, pred_y))
     print 'accuracy_score:{0}'.format(accuracy_score(y_test, pred_y))
     print '0precision_score:{0}'.format(
         precision_score(y_test, pred_y, pos_label=0))
     print '0recall_score:{0}'.format(
         recall_score(y_test, pred_y, pos_label=0))
     print '1precision_score:{0}'.format(
         precision_score(y_test, pred_y, pos_label=1))
     print '1recall_score:{0}'.format(
         recall_score(y_test, pred_y, pos_label=1))
     fp = open(path + 'log.txt', 'a')
     fp.write('crosstab:{0}'.format(pd.crosstab(y_test, pred_y)) + '\n')
     fp.write('accuracy_score:{0}'.format(accuracy_score(y_test, pred_y)) +
              '\n')
     fp.write('0precision_score:{0}'.format(
         precision_score(y_test, pred_y, pos_label=0)) + '\n')
     fp.write('0recall_score:{0}'.format(
         recall_score(y_test, pred_y, pos_label=0)) + '\n')
     fp.write('1precision_score:{0}'.format(
         precision_score(y_test, pred_y, pos_label=1)) + '\n')
     fp.write('1recall_score:{0}'.format(
         recall_score(y_test, pred_y, pos_label=1)) + '\n')
     fp.close()
示例#5
0
 def predict(self, X):
     svm_type = LIBSVM_IMPL.index(self.impl)
     if not self.__X:
         print('You should train the model first!!!')
         sys.exit(3)
     else:
         gram_matr_predict_new = self.string_kernel(X, self.__X)
         gram_matr_predict_new = np.asarray(gram_matr_predict_new,
                                            dtype=np.float64,
                                            order='C')
         return libsvm.predict(gram_matr_predict_new,
                               self.support_,
                               self.support_vectors_,
                               self.n_support_,
                               self.dual_coef_,
                               self._intercept_,
                               self._label,
                               self.probA_,
                               self.probB_,
                               svm_type=svm_type,
                               kernel=self.kernel,
                               C=self.C,
                               nu=self.nu,
                               probability=self.probability,
                               degree=self.degree,
                               shrinking=self.shrinking,
                               tol=self.tol,
                               cache_size=self.cache_size,
                               coef0=self.coef0,
                               gamma=self._gamma,
                               epsilon=self.epsilon)
示例#6
0
def dataset3_params(X, y, Xval, yval):
    all_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    all_sigma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    best_C = all_C[0]
    best_sigma = all_sigma[0]
    previous_err = 1000.0
    for C in all_C:
        for sigma in all_sigma:
            gamma = 1.0 / (2.0 * sigma ** 2)
            model = libsvm.fit(X, y, kernel='rbf', C=C, gamma=gamma)
            predictions = libsvm.predict(
                Xval,
                support=model[0],
                SV=model[1],
                nSV=model[2],
                sv_coef=model[3],
                intercept=model[4],
                label=model[5],
                probA=model[6],
                probB=model[7],
                kernel='rbf',
                gamma=gamma
            )
            err = np.mean(predictions != yval)
            if err < previous_err:
                best_C = C
                best_sigma = sigma
                previous_err = err
    return (best_C, best_sigma)
示例#7
0
def dataset3_params(X, y, Xval, yval):
    all_C = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    all_sigma = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    best_C = all_C[0]
    best_sigma = all_sigma[0]
    previous_err = 1000.0
    for C in all_C:
        for sigma in all_sigma:
            gamma = 1.0 / (2.0 * sigma**2)
            model = libsvm.fit(X, y, kernel='rbf', C=C, gamma=gamma)
            predictions = libsvm.predict(Xval,
                                         support=model[0],
                                         SV=model[1],
                                         nSV=model[2],
                                         sv_coef=model[3],
                                         intercept=model[4],
                                         label=model[5],
                                         probA=model[6],
                                         probB=model[7],
                                         kernel='rbf',
                                         gamma=gamma)
            err = np.mean(predictions != yval)
            if err < previous_err:
                best_C = C
                best_sigma = sigma
                previous_err = err
    return (best_C, best_sigma)
示例#8
0
def main():
    train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()
    train_x_flatten = train_x_orig.reshape(
        train_x_orig.shape[0],
        -1).T  # The "-1" makes reshape flatten the remaining dimensions
    test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T
    train_x = train_x_flatten / 255.
    test_x = test_x_flatten / 255.
    layers_dims = [12288, 20, 7, 5, 1]
    parameters = L_layer_model(train_x,
                               train_y,
                               layers_dims,
                               num_iterations=1000,
                               print_cost=True)
    predictions_train = predict(train_x, train_y, parameters)
    pred_test = predict(test_x, test_y, parameters)
    print(pred_test)
    print(predictions_train)
def classify(line):
    word_id_list = get_vsm.get_vsm(line)
    tmp_dict = {}  # 临时词典,key为特征id,value为特征值

    if len(word_id_list) > 0:
        for item in word_id_list:
            tmp_dict[item[0]] = item[1]

        y = []
        x = []
        x.append(tmp_dict)
        # liblinear python接口
        p_labs, p_acc, p_vals = predict(y, x, model_, '-b 1')
        print(p_labs, p_vals)
        return get_max_prob_classes(p_vals[0])
    else:
        return -1
示例#10
0
def visualize_boundary(X, y, model, gamma):
    plot_data(X, y, show=False)
    x1plot = np.linspace(np.amin(X[:, 0]), np.amax(X[:, 0]), 100)
    x2plot = np.linspace(np.amin(X[:, 1]), np.amax(X[:, 1]), 100)
    X1, X2 = np.meshgrid(x1plot, x2plot)
    predictions = np.zeros_like(X1)
    for i in range(X1.shape[1]):
        currentX = np.require(np.vstack((X1[:, i], X2[:, i])).T,
                              requirements='C_CONTIGUOUS')
        predictions[:, i] = libsvm.predict(currentX,
                                           support=model[0],
                                           SV=model[1],
                                           nSV=model[2],
                                           sv_coef=model[3],
                                           intercept=model[4],
                                           label=model[5],
                                           probA=model[6],
                                           probB=model[7],
                                           kernel='rbf',
                                           gamma=gamma)
    plot.contour(X1, X2, predictions, [0.0, 0.0])
    plot.show()
示例#11
0
def DataSet3Params(X, y, Xval, yval):
    C = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])
    sigma = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30])

    previousError = 2000

    for i in range(len(C)):
        currentC = C[i]
        for j in range(len(sigma)):

            currentSigma = sigma[j]
            gamma = 1 / (2 * (currentSigma**2))

            model = libsvm.fit(X, y, kernel='rbf', C=currentC,
                               gamma=gamma)  # Train

            predictions = libsvm.predict(Xval,
                                         support=model[0],
                                         SV=model[1],
                                         nSV=model[2],
                                         sv_coef=model[3],
                                         intercept=model[4],
                                         probA=model[5],
                                         probB=model[6],
                                         kernel='rbf',
                                         gamma=gamma)

            currentError = np.mean(predictions != yval)

            if currentError < previousError:
                bestC = currentC
                bestSigma = currentSigma
                previousError = currentError

    C = bestC
    sigma = bestSigma

    return C, sigma
示例#12
0
def visualize_boundary(X, y, model, gamma):
    plot_data(X, y, show=False)
    x1plot = np.linspace(np.amin(X[:, 0]), np.amax(X[:, 0]), 100)
    x2plot = np.linspace(np.amin(X[:, 1]), np.amax(X[:, 1]), 100)
    X1, X2 = np.meshgrid(x1plot, x2plot)
    predictions = np.zeros_like(X1)
    for i in range(X1.shape[1]):
        currentX = np.require(np.vstack((X1[:, i], X2[:, i])).T, requirements='C_CONTIGUOUS')
        predictions[:, i] = libsvm.predict(
            currentX,
            support=model[0],
            SV=model[1],
            nSV=model[2],
            sv_coef=model[3],
            intercept=model[4],
            label=model[5],
            probA=model[6],
            probB=model[7],
            kernel='rbf',
            gamma=gamma
        )
    plot.contour(X1, X2, predictions, [0.0, 0.0])
    plot.show()
示例#13
0
 def decision_function(self, X):
     return predict(X)
 print('Word indices:\n%s' % word_indices)
 features = email_features(word_indices, vocabulary)
 print('Length of feature vector: %d' % len(features))
 print('Number of non-zero entries: %d' % sum(features > 0))
 # train SVM for spam classification
 data = loadmat('../../machine-learning-ex6/ex6/spamTrain.mat')
 X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS')
 y = np.require(data['y'].flatten(), dtype=np.float64)
 C = 0.1
 model = libsvm.fit(X, y, kernel='linear', C=C)
 predictions = libsvm.predict(
     X,
     support=model[0],
     SV=model[1],
     nSV=model[2],
     sv_coef=model[3],
     intercept=model[4],
     label=model[5],
     probA=model[6],
     probB=model[7],
     kernel='linear',
 )
 accuracy = 100 * np.mean(predictions == y)
 print('Training set accuracy: %0.2f %%' % accuracy)
 # load test set
 data = loadmat('../../machine-learning-ex6/ex6/spamTest.mat')
 Xtest = np.require(data['Xtest'], dtype=np.float64, requirements='C_CONTIGUOUS')
 ytest = np.require(data['ytest'].flatten(), dtype=np.float64)
 print('Evaluating the trained Linear SVM on a test set ...')
 predictions = libsvm.predict(
     Xtest,
     support=model[0],
# The original training and test labels are normalised here
for j in range(0,len(X)):
    X[j] = normaliseData(X[j])
for j in range(0,len(data)):
    data[j] = normaliseData(data[j])


# SVM model is trained here using libsvm in-built library functions. Kernel typr is polynomial here and its degree is kept 4.
# All parameters are kept for making the most optimal fit for the data
[support, sv, nsv, coeff, intercept, proba, probb, fit_status] = libsvm.fit(X, Y, svm_type=0, kernel='poly', degree=4,
        gamma=0.093, coef0=0, tol=0.001, C=1, nu=0.5, max_iter=-1, random_seed=0)

m = [support, sv, nsv, coeff, intercept, proba, probb]
save_model('model.pkl',m)

[support_, sv_, nsv_, coeff_, intercept_, proba_, probb_] = load_model('model.pkl')
# Predictions are made on the test dataset using the hyper parameters trained on training dataset
dec_values = libsvm.predict(data, support_, sv_, nsv_, coeff_, intercept_, proba_, probb_, svm_type=0,kernel='poly', degree =4,
                            gamma=0.093, coef0=0)
dec_values.astype(int)

# Predictions are written to a csv file named result.csv
j=0
with open('result.csv', 'w') as csvfile:
    spamwriter = csv.writer(csvfile)
    for j in range(0,len(dec_values)):
        spamwriter.writerow([int(dec_values[j])])

        Host ID: 90489ad33ff3 74e6e22945bf         Release: R2015b         Login Name: root
        [ 58 139  66  61  76  90  43  96 164 143]
def fitSLR(x, y):
    n = len(x)
    dinominator = 0
    numerator = 0
    for i in range(0, n):
        numerator += (x[i] - np.mean(x)*(y[i] - np.mean(y))
        dinominator += (x[i] - np.mean(x))**2
        
    print("numerator:", numerator)
    print("dinominator:", dinominator)
        
    b1 = numerator/float(dinominator))
    b0 = np.mean(y)/float(np.mean(x))
    
    return b0, b1

def predict(x, b0, b1):
    return b0 + x*b1

x = [1, 3, 2, 1, 3]
y = [14, 24, 18, 17, 27]

b0, b1 = fitSLR(x, y)

print("intercept:", b0, "slope:", b1)

x_test = 6
y_test = predict(6, b0, b1)

print("y_test:", y_test)
示例#17
0
 def predict(self, input):
     input = np.array(input)
     result = libsvm.predict(input, *(self.model))
     return result
示例#18
0
    'D:\ML\ML\CSR ML\WEEK#7\Machine Learning Assignment#6\Python\spamTrain.mat'
)
X = np.require(svmtrain['X'], dtype=np.float64, requirements='C')  # 51X2
#print(X.flags)
y = np.require(svmtrain['y'].flatten(), dtype=np.float64)  # 51X1

print('Training Linear SVM (Spam Classification)')
print('(this may take 1 to 2 minutes) ...')

C = 0.1
model = libsvm.fit(X, y, kernel='linear', C=C)
p = libsvm.predict(X,
                   support=model[0],
                   SV=model[1],
                   nSV=model[2],
                   sv_coef=model[3],
                   intercept=model[4],
                   probA=model[5],
                   probB=model[6],
                   kernel='linear')

accuracyTrain = np.mean(p == y) * 100
print('Training accuracy', accuracyTrain)

# Test Spam Classification
svmtest = loadmat(
    'D:\ML\ML\CSR ML\WEEK#7\Machine Learning Assignment#6\Python\spamTest.mat')
Xtest = np.require(svmtest['Xtest'], dtype=np.float64,
                   requirements='C')  # 51X2
#print(X.flags)
ytest = np.require(svmtest['ytest'].flatten(), dtype=np.float64)  # 51X1
示例#19
0
 print('Word indices:\n%s' % word_indices)
 features = email_features(word_indices, vocabulary)
 print('Length of feature vector: %d' % len(features))
 print('Number of non-zero entries: %d' % sum(features > 0))
 # train SVM for spam classification
 data = loadmat('../../octave/mlclass-ex6/spamTrain.mat')
 X = np.require(data['X'], dtype=np.float64, requirements='C_CONTIGUOUS')
 y = np.require(data['y'].flatten(), dtype=np.float64)
 C = 0.1
 model = libsvm.fit(X, y, kernel='linear', C=C)
 predictions = libsvm.predict(
     X,
     support=model[0],
     SV=model[1],
     nSV=model[2],
     sv_coef=model[3],
     intercept=model[4],
     label=model[5],
     probA=model[6],
     probB=model[7],
     kernel='linear',
 )
 accuracy = 100 * np.mean(predictions == y)
 print('Training set accuracy: %0.2f %%' % accuracy)
 # load test set
 data = loadmat('../../octave/mlclass-ex6/spamTest.mat')
 Xtest = np.require(data['Xtest'], dtype=np.float64, requirements='C_CONTIGUOUS')
 ytest = np.require(data['ytest'].flatten(), dtype=np.float64)
 print('Evaluating the trained Linear SVM on a test set ...')
 predictions = libsvm.predict(
     Xtest,
     support=model[0],
示例#20
0
def classify(features, model):
    vec = DictVectorizer()
    X = vec.fit_transform(features)
    prediction = libsvm.predict(X.toarray(), *model)
    return int(prediction[0])