Пример #1
0
def _test_svm():
    import svm

    # Global variables
    global X_train
    global X_val
    global X_test
    global y_train
    global y_val
    global y_test

    # Train the SVM
    svm = svm.LinSVM()
    tic = time.time()
    losses = svm.train_sgd(X_train, y_train, eta=1e-7, reg=5e4, epochs=1500, s=250, verbose=True)
    toc = time.time()
    print "that took %fs" % (toc - tic)

    # Plot the training error
    plt.plot(losses)
    plt.xlabel("Iteration number")
    plt.ylabel("Loss value")
    plt.show()

    # Get the training and validation accuracy
    y_train_pred = svm.predict(X_train)
    print "training accuracy: %f" % (np.mean(y_train == y_train_pred),)
    y_val_pred = svm.predict(X_val)
    print "validation accuracy: %f" % (np.mean(y_val == y_val_pred),)

    # Visualize the weights
    classes = [str(i) for i in range(svm.W.shape[0])]
    misc.viz_weights_categorical(svm.W, classes, (28, 28))
Пример #2
0
    def testBayesError(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        gridX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            gridX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        Cs = 2**numpy.arange(-5, 5, dtype=numpy.float)
        gammas = 2**numpy.arange(-5, 5, dtype=numpy.float)

        bestError = 1 

        for C in Cs:
            for gamma in gammas:
                svm = LibSVM(kernel="gaussian", C=C, kernelParam=gamma)
                svm.learnModel(trainX, trainY)
                predY, decisionsY = svm.predict(gridX, True)
                decisionGrid = numpy.reshape(decisionsY, (gridPoints.shape[0], gridPoints.shape[0]), order="F")
                error = ModelSelectUtils.bayesError(gridPoints, decisionGrid, pdfX, pdfY1X, pdfYminus1X)

                predY, decisionsY = svm.predict(testX, True)
                error2 = Evaluator.binaryError(testY, predY)
                print(error, error2)

                if error < bestError:
                    error = bestError
                    bestC = C
                    bestGamma = gamma

        svm = LibSVM(kernel="gaussian", C=bestC, kernelParam=bestGamma)
        svm.learnModel(trainX, trainY)
        predY, decisionsY = svm.predict(gridX, True)

        plt.figure(0)
        plt.contourf(gridPoints, gridPoints, decisionGrid, 100)
        plt.colorbar()

        plt.figure(1)
        plt.scatter(X[y==1, 0], X[y==1, 1], c='r' ,label="-1")
        plt.scatter(X[y==-1, 0], X[y==-1, 1], c='b',label="+1")
        plt.legend()
        plt.show()
def predict():
    if not request.json:
        flask.abort(400)

    data = {"success": False}
    
    imgbase64 = request.json['imgbase64']
    model_typ = request.json['model']
    start = time.process_time()
    if model_typ == 'cnn':   
        lp, roi, rmb = cnn.predict(cnn_model,str(imgbase64))
        if lp is not None:
            data['success'] = True
            data['lp'] = lp
            data['roi'] = roi
            data['rmb'] = rmb
    else:
        data['model'] = model_typ
        lp, roi, rmb = svm.predict(svm_model,str(imgbase64))
        if lp is not None:
            data['success'] = True
            data['lp'] = lp
            data['roi'] = roi
            data['rmb'] = rmb

    end = time.process_time() - start
    data['time'] = end
    
    return jsonify(data)
Пример #4
0
def _test_svm():
    import svm

    # Global variables
    global X_train
    global X_val
    global X_test
    global y_train
    global y_val
    global y_test

    # Train the SVM
    svm = svm.LinSVM()
    tic = time.time()
    losses = svm.train_sgd(X_train,
                           y_train,
                           eta=1e-7,
                           reg=5e4,
                           epochs=1500,
                           s=250,
                           verbose=True)
    toc = time.time()
    print 'that took %fs' % (toc - tic)

    # Plot the training error
    plt.plot(losses)
    plt.xlabel('Iteration number')
    plt.ylabel('Loss value')
    plt.show()

    # Get the training and validation accuracy
    y_train_pred = svm.predict(X_train)
    print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), )
    y_val_pred = svm.predict(X_val)
    print 'validation accuracy: %f' % (np.mean(y_val == y_val_pred), )

    # Visualize the weights
    classes = [str(i) for i in range(svm.W.shape[0])]
    misc.viz_weights_categorical(svm.W, classes, (28, 28))
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0):
    #m = 1000
    #dimension = 256
    (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE)

    #n_components = 100
    #images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min)

    #(pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE)
    (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE)

    kappa_score_train = quadratic_weighted_kappa(pred[:m/2], y[:m/2], min_rating=0, max_rating=4)
    kappa_score_test = quadratic_weighted_kappa(pred[m/2:], y[m/2:], min_rating=0, max_rating=4)
    kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4)

    print "kappa score for train: ", kappa_score_train
    print "kappa score for test: ", kappa_score_test
    print "kappa score for all data: ", kappa_score_all
    print "svm score: ", svm_score
def do(m, dimension, n_components, FIX_INVERTED=True, FIX_RIGHT_LEFT=True, SAVE=True, n_components_min=0):
    # m = 1000
    # dimension = 256
    (images, y) = pre_process.extract(m, dimension, FIX_INVERTED, FIX_RIGHT_LEFT, SAVE)

    # n_components = 100
    # images_reduced = pca.fit_transform(m, dimension, images, n_components, SAVE, n_components_min)

    # (pred, svm_score) = svm.predict(m, dimension, images_reduced, y, SAVE)
    (pred, svm_score) = svm.predict(m, dimension, images, y, SAVE)

    kappa_score_train = quadratic_weighted_kappa(pred[: m / 2], y[: m / 2], min_rating=0, max_rating=4)
    kappa_score_test = quadratic_weighted_kappa(pred[m / 2 :], y[m / 2 :], min_rating=0, max_rating=4)
    kappa_score_all = quadratic_weighted_kappa(pred, y, min_rating=0, max_rating=4)

    print "kappa score for train: ", kappa_score_train
    print "kappa score for test: ", kappa_score_test
    print "kappa score for all data: ", kappa_score_all
    print "svm score: ", svm_score
    def prediction(self):        
        if self.lst is None:
            self.lst = []
        
        if(self.extractor.get_blacklist()):
            self.blacklist = True
        else:    
            self.lst = self.extractor.get_features()
                      
        print (self.myqueue.qsize())
#   
        if(self.blacklist):
                #result = [0,0]
                result = 0
        else:
            data = (list(self.lst))
            svm_prdt =  svm.predict(data)
            #kclus_prdt = kclus.predict(data)        
            #result = [svm_prdt[0],kclus_prdt[0]]
            result = svm_prdt[0]
        return result
Пример #8
0
def predict_historical(data, days_ahead):
    '''
    dummy pyspark function. Use as wrapper and add all predictions here

    data: dict object of the form:
    {
        symbol:
        date:
        open:
        high:
        low:
        close:
        volume:

    } These values are all unicode strings! parse to int/float as needed
    '''

    x = data["close"]
    symbol = data["symbol"]
    prediction = svm.predict(symbol, days_ahead, x)

    return prediction
Пример #9
0
f = lambda w:svm.svm_loss_naive(w,x_dev,y_dev,0.0)[0]
grad_numerical = grad_check_sparse(f,w,grad)

# 模型进行测试
svm = LinearSVM()    #创建对象,此时W为空
tic = time.time()
loss_hist = svm.train(x_train,y_train,learning_rate = 1e-7,reg = 2.5e4,num_iters = 1500,verbose = True)    #此时svm对象中有W
toc = time.time()
print('that took %fs' % (toc -tic))

plt.plot(loss_hist)
plt.xlabel('iteration number')
plt.ylabel('loss value')
plt.show()
#训练完成之后,将参数保存,使用参数进行预测,计算准确率
y_train_pred = svm.predict(x_train)
print('training accuracy: %f'%(np.mean(y_train==y_train_pred)))
y_val_pred = svm.predict(x_val)
print('validation accuracy: %f'%(np.mean(y_val==y_val_pred)))
'''
#在拿到一组数据时一般分为训练集,开发集(验证集),测试集。训练和测试集都知道是干吗的,验证集在除了做验证训练结果外
# 还可以做超参数调优,寻找最优模型。遍历每一种参数组合,训练SVM模型,然后在验证集上测试,寻找验证集上准确率最高的模型
# 交叉验证很费时间,下面的代码总共循环18次,在9700KCPU上满载运行了十多分钟,自己选择是否运行该段代码。

# 超参数调优(交叉验证)
learning_rates = [1.4e-7, 1.5e-7, 1.6e-7]
# for循环的简化写法12个
regularization_strengths = [(1 + i * 0.1) * 1e4 for i in range(-3, 3)] + [(2 + i * 0.1) * 1e4 for i in range(-3, 3)]
results = {}  # 字典
best_val = -1
best_svm = None
Пример #10
0
    X, _ = preprocess_data(train_data, None)
    clf = MultinomialNB()
    scores = []
    for metric in metrics:
        scores.append(cross_val_score(clf, X, y, cv=10, scoring=metric).mean())
    return scores


################################

if __name__ == "__main__":
    train_data = pd.read_csv(dataset_path + 'train_set.csv', sep="\t")
    test_data = pd.read_csv(dataset_path + 'test_set.csv', sep="\t")
    print "my_method: Loaded data."

    le = preprocessing.LabelEncoder()
    le.fit(train_data["Category"])
    y = le.transform(train_data["Category"])

    X, Test = preprocess_data(train_data, test_data)

    # Prediction:
    import svm
    Test_pred = le.inverse_transform(svm.predict(X, y, Test))

    predFile = open("./testSet_categories.csv", "w+")
    predFile.write("Id,Category\n")
    for i in range(len(Test_pred)):
        predFile.write(str(test_data['Id'][i]) + ',' + Test_pred[i] + '\n')
    predFile.close()
Пример #11
0
Файл: test.py Проект: mgno32/a4
        nh = FSIZE * h // w
        x = 0
        y = (FSIZE - nh) // 2
    else:
        nh = FSIZE
        nw = FSIZE * w // h
        y = 0
        x = (FSIZE - nw) // 2
    if nh < 1 or nw < 1:
        continue
    nz = cv2.resize(nz, (nw, nh))
    pz = np.zeros((FSIZE, FSIZE))
    #nz = np.pad(nz, ((1,1),(2,2)), "constant")
    pz[y:y + nh, x:x + nw] = nz

    pre = svm.predict(pz)
    py = minc[1]
    if lastY is not None and py - lastY > LINE_H:
        pres.append([])
        layer = pres[-1]
    lastY = py
    layer.append(pre)
    '''
    import os
    path = "/home/hal/Downloads/Final+Project/dataset/"
    name = path + "%s.png" % 0
    k = 0
    while os.path.exists(name): 
        k += 1
        name = path + "%s.png" % k
    '''
Пример #12
0
#########################################
# Do classification to check the accuracy using the test set
#########################################
if silence == 0:
    print("Calculating SVM %s kernel and decision matrix" % kernel)

if cheat_test:
    # for handcrafted data, reduce algorithm complexity by reducing the number of support vectors
    supports = supports[0:size_limit, :]
    alpha_vector = alpha_vector[:, 0:size_limit]

decision, vote = svm.get_decision(X_test, kernel, coef, degree, alpha_vector,
                                  supports, intercept, num_classifiers)

if silence == 0: print("Classifying test set...")
y_manual = svm.predict(X_test, clf, class_type, classes, num_classifiers,
                       decision, vote)

#########################################
# Compare model accuracies (computed vs Python library output)
#########################################
y_pred = clf.predict(X_test)

print("Accuracy from predict function: %f" % accuracy_score(y_test, y_pred))
print("Accuracy from manual calculation: %f" %
      accuracy_score(y_test, y_manual))

sensitivity = sum(sum((y_manual.T == y_test) & (y_test == 1))) / sum(y_test)
specificity = sum(
    sum((y_manual.T == y_test) & (y_test == 0))) / (len(y_test) - sum(y_test))
print("Sensitivity: %f" % sensitivity)
print("Specificity: %f" % specificity)
Пример #13
0
def training_loop(epochs,
                  grad_fn,
                  loss_fn,
                  w,
                  x,
                  y,
                  batch_size,
                  x_test=None,
                  y_test=None,
                  max_steps=None,
                  m=50,
                  eta=0.01,
                  **params):
    """

    :param epochs: number of epoch to do
    :param step_function: optimization algorithm
    :param w: svm weights
    :param x: x input (here mnist images)
    :param y: target classes
    :param batch_size: size of a minibatch
    :param x_test: x test
    :param y_test: y test
    :param params: parameters for the optimization algorithms
    :return:
    """

    # On sait quelle taille fait un batch, on compte le nombre de batch que ça donne pour prendre toutes les données
    n_batch = (x.shape[0] // batch_size)

    # utile pour brasser les données à chaque epoch
    idx = np.array([i for i in range(x.shape[0])])

    # Training histories
    loss_records = []
    valloss_records = []
    valaccuracy_records = []
    accuracy_records = []

    parameters = None
    t = 1
    counter = 0

    wtilde = copy(w)

    for e in range(epochs):
        # mélange les indices
        np.random.shuffle(idx)

        # We randomly generates batches
        x_batches = np.array_split(x[idx], n_batch)
        y_batches = np.array_split(y[idx], n_batch)

        # Forme une liste d'éléments de la forme "[(batch_x, batch_y) ... ]"
        batchs = list(zip(x_batches, y_batches))

        # Compute mu
        mu = 0

        # Ce sont les batch qui définissent les fonctions de gradients qu'ont va utiliser,
        # \Psi_i sont définies par les batchi_x, batchi_y
        # C'est grad_fn() qui prend les poids courants, l'entrée en x, les sorties attendues, **parms c'est rien, juste
        # de la technique python
        for i in range(n_batch):
            batch_x = batchs[i][0]
            batch_y = batchs[i][1]

            # Chaque grad \Psi_i
            mu += grad_fn(w, batch_x, batch_y, **params)

        # La moyenne
        mu /= n_batch

        # Fin de compute mu

        for t in range(m):
            it = np.random.randint(n_batch)
            batch_x = batchs[it][0]
            batch_y = batchs[it][1]

            grad_1 = grad_fn(w, batch_x, batch_y, **params)
            grad_2 = grad_fn(wtilde, batch_x, batch_y, **params)

            w = svrg_update(w, grad_1 - grad_2, eta, mu)
            accuracy_records.append((predict(w, batch_x) == batch_y).mean())
            loss_records.append(loss_fn(w, batch_x, batch_y, **params))

        print(accuracy_records[-1])

        # if we provided a test set evaluate the model on it
        if x_test is not None and y_test is not None:
            valloss_records.append(loss_fn(w, x_test, y_test, **params))
            valaccuracy_records.append((predict(w, x_test) == y_test).mean())

        # option 1
        wtilde = copy(w)

    # return the histories
    return loss_records, valloss_records, accuracy_records, valaccuracy_records
Пример #14
0
                    x, y, w, h = [
                        v for v in f
                    ]  # scale the bounding box back to original frame size
                    cropped_face = rotated_frame[y:y + h, x:x +
                                                 w]  # img[y: y + h, x: x + w]
                    cropped_face = cv2.resize(cropped_face,
                                              DISPLAY_FACE_DIM,
                                              interpolation=cv2.INTER_AREA)

                    # Name Prediction
                    face_to_predict = cv2.resize(cropped_face,
                                                 FACE_DIM,
                                                 interpolation=cv2.INTER_AREA)
                    face_to_predict = cv2.cvtColor(face_to_predict,
                                                   cv2.COLOR_BGR2GRAY)
                    name_to_display = svm.predict(clf, pca, face_to_predict,
                                                  face_profile_names)

                    # Display frame
                    cv2.rectangle(rotated_frame, (x, y), (x + w, y + h),
                                  (0, 255, 0))
                    cv2.putText(rotated_frame, name_to_display, (x, y),
                                cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0))

                # rotate the frame back and trim the black paddings
                processed_frame = ut.trim(
                    ut.rotate_image(rotated_frame, rotation * (-1)),
                    frame_scale)

                # reset the optmized rotation map
                current_rotation_map = get_rotation_map(rotation)
Пример #15
0
"""
Created on Tue Nov  7 17:23:15 2017

@author: Xie Yang
"""

import corpusProcess
import prepareVector
import pattern
import svm
import configparser

config = configparser.ConfigParser()
config.read('config.conf', encoding="utf8")
fileName = config['input']['fileName']
fn = fileName.split('.')[0]
filePath = config['input']['filePath']
mode = config['mode']['mode']

if mode == 'svm':
    corpusProcess.process(fileName, filePath)
    prepareVector.toVector(fileName, filePath)
    svm.predict("model/svmclf.clf", filePath, fileName)
elif mode == 'pattern':
    pattern.match(fileName, filePath)
elif mode == 'rf':
    pass
elif mode == 'ensemble':
    pass
else:
    print("请输入正确的模式!")
dataArr, labelArr = svm.loadDataSet("testSet.txt")
print("dataArr.shape:", np.shape(dataArr), "labelArr.shape:",
      np.shape(labelArr))

#b, alphas = svm.smoSimple(dataArr, labelArr, 0.6, 0.001, 40)

b, alphas = svm.smoP(dataArr, labelArr, 0.6, 0.001, 40)

print("b=", b)
# print("---")
print("alpha.shape:", alphas.shape)

print(alphas[alphas > 0])

# 支持向量
# for i in range(len(dataArr)):
#     if alphas[i] > 0.0:
#         print(dataArr[i], labelArr[i])

w = svm.calcWs(alphas, dataArr, labelArr)
#svm.plot_sv(dataArr, labelArr, w, b, alphas)

i = 5
y_ = svm.predict(dataArr[5], w, b)
print("y_:{}, y:{}".format(y_, labelArr[i]))

svm.eval(dataArr, labelArr, w, b)

svm.testRbf(1)

svm.testDigits(kTup=('rbf', 5))
Пример #17
0

label_test ={}
x_test ={}
ftest = open('../data/test','r')

line = ftest.readline()
itr =0
max_len =0
while(line):
	itr=itr+1
	email = line.split(' ')
	if(email[1]=='ham'):
		label_test[itr] =-1
	else:
		label_test[itr] =1
	i=2
	arr= []
	while(i<len(email)-1):
		word = email[i]
		count= int(email[i+1])
		arr.append(count)
		i+=2
	x_test[itr] = arr
	
	line = ftest.readline()
	
ftest.close()

[prediction,accuracy,values]= svm.predict(label_test,x_test,trained) 
Пример #18
0
if len(sys.argv) == 2:
    DATA_PATH = sys.argv[1]

    if not path.exists(DATA_PATH):
        print("\nError: There is no picture in this direction\n")
        exit()

    if not utils.check_image_format(DATA_PATH):
        print(
            "\nError: File extension has to be one of these: png, jpg, jpeg, pgm\n"
        )
        exit()

    FACE = cv2.imread(DATA_PATH, 0)
    PREDICTION_NAME = predict(FACE)
    print("This is picture of \"%s\"" % PREDICTION_NAME)
    exit()
elif len(sys.argv) > 2:
    print("\nError: Specify only one picture at a time\n")
    exit()

SKIP_FRAME = 2  # the fixed skip frame
FRAME_SKIP_RATE = 0  # skip SKIP_FRAME frames every other frame
SCALE_FACTOR = 2  # used to resize the captured frame for face detection for faster processing speed
CURRENT_ROTATION_MAP = utils.get_rotation_map(0)
WEBCAM = cv2.VideoCapture(0)
RET, FRAME = WEBCAM.read()  # get first frame
FRAME_SCALE = (int(FRAME.shape[1] / SCALE_FACTOR),
               int(FRAME.shape[0] / SCALE_FACTOR))  # (y, x)
CROPPED_FACE = []
Пример #19
0
            # for f in faces:
            #     x, y, w, h = [ v*SCALE_FACTOR for v in f ] # scale the bounding box back to original frame size
            #     cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0))
            #     cv2.putText(frame, "DumbAss", (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0))

            if len(faces):
                for f in faces:
                    # Crop out the face
                    x, y, w, h = [ v for v in f ] # scale the bounding box back to original frame size
                    cropped_face = rotated_frame[y: y + h, x: x + w]   # img[y: y + h, x: x + w]
                    cropped_face = cv2.resize(cropped_face, DISPLAY_FACE_DIM, interpolation = cv2.INTER_AREA)

                    # Name Prediction
                    face_to_predict = cv2.resize(cropped_face, FACE_DIM, interpolation = cv2.INTER_AREA)
                    face_to_predict = cv2.cvtColor(face_to_predict, cv2.COLOR_BGR2GRAY)
                    name_to_display = svm.predict(clf, pca, face_to_predict, face_profile_names)

                    # Display frame
                    cv2.rectangle(rotated_frame, (x,y), (x+w,y+h), (0,255,0))
                    cv2.putText(rotated_frame, name_to_display, (x,y), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,255,0))

                # rotate the frame back and trim the black paddings
                processed_frame = ut.trim(ut.rotate_image(rotated_frame, rotation * (-1)), frame_scale)

                # reset the optmized rotation map
                current_rotation_map = get_rotation_map(rotation)

                faceFound = True


                break