示例#1
0
def example(num_samples=8, num_features=2, grid_size=20, filename="example2.pdf"):
    l=[2.0,1.0,2.0,2.0,3.0,2.0,1.0,3.0,0.0,1.0,-1.0,-1.0,-1.0,-2.0,-2.0,-1.0]  # here put your input training vectors
    lmat=np.asarray(l)
    lab=[1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0] # here put your labels 
    llab=np.asarray(lab)
    
    samples = np.matrix(lmat # defining training matrix
                        .reshape(num_samples, num_features)) # re-shaping the matrix 
    
    labels = np.matrix(llab # defining training matrix
                        .reshape(num_samples, 1)) # re-shaping the matrix
   
    trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) # linear kernel 
    predictor = trainer.train(samples, labels)

    plot(predictor, samples, labels, grid_size, filename)

    # predict 
    print "training performed , predict !"
    i=raw_input("space seperated coordinates of prediction vector : ")
    try:
        s=i.split()
        x=[float(s[0]),float(s[1])]
    except:
        sys.exit(0)
    try:
        xmat=np.asarray(x)
        xp=np.matrix(xmat # defining training matrix
                        .reshape(1, num_features)) # re-shaping the predict input 
        print predictor.predict(xp)
    except:
         sys.exit(0)
示例#2
0
def example(num_samples=10, num_features=2, grid_size=20, filename="svm.pdf"):
    samples = np.matrix(
        np.random.normal(size=num_samples * num_features).reshape(
            num_samples, num_features))
    labels = 2 * (samples.sum(axis=1) > 0) - 1.0
    trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1)
    predictor = trainer.train(samples, labels)

    plot(predictor, samples, labels, grid_size, filename)
def trainSVM(feature_samples, label_samples):

    samples = feature_samples
    labels = label_samples

    trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.01)
    predictor = trainer.train(samples, labels)

    return predictor
示例#4
0
def example(num_samples=10, num_features=2, grid_size=20, filename="svm.pdf"):
    samples = np.matrix(np.random.normal(size=num_samples * num_features) # defining training matrix
                        .reshape(num_samples, num_features)) # re-shaping the matrix 
    print type(samples)
    labels = 2 * (samples.sum(axis=1) > 0) - 1.0
    print labels
    trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) # linear kernel 
    predictor = trainer.train(samples, labels)

    plot(predictor, samples, labels, grid_size, filename)
示例#5
0
def trainSVM(feature_samples, label_samples):

    num_samples=41
    num_features=4
    grid_size=20

    samples = feature_samples
    labels = label_samples

    trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.01)
    predictor = trainer.train(samples, labels)
    #plot(predictor, samples, labels, grid_size, filename)

    return predictor
示例#6
0
def example(num_samples=10,
            num_features=784,
            grid_size=20,
            filename="svm.pdf"):
    samples = np.matrix(
        np.random.normal(size=num_samples * num_features).reshape(
            num_samples, num_features))
    samples = train_x
    labels = 2 * (samples.sum(axis=1) > 0) - 1.0
    labels = train_y - 2
    print("sample variance :", np.var(samples))

    trainer = svmpy.SVMTrainer(kernel.Kernel()._polykernel(120, 1), 0.1)
    predictor = trainer.train(samples, labels)
    predictAll(test_x, test_y, predictor)
示例#7
0
def example(num_samples=10, num_features=2, grid_size=20, filename="svm.pdf"):
    samples = np.matrix(
        np.random.normal(size=num_samples * num_features).reshape(
            num_samples, num_features))
    print samples.shape
    print samples
    labels = 2 * (samples.sum(axis=1) > 0) - 1.0
    print labels.shape
    trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1)
    predictor = trainer.train(samples, labels)
    with open("predictor.pkl",
              'wb') as output:  # Overwrites any existing file.
        pickle.dump(predictor, output, pickle.HIGHEST_PROTOCOL)

    with open('predictor.pkl', 'rb') as input:
        predictor = pickle.load(input)

    plot(predictor, samples, labels, grid_size, filename)
    def fit(self, curpus, sample_y):
        word_indexes = []
        word_counts = []
        for row_curpus in curpus:
            row_indexes = []
            row_counts = []
            for w_i, w_c in row_curpus:
                row_indexes.append(w_i)
                row_counts.append(w_c)
            word_indexes.append(row_indexes)
            word_counts.append(row_counts)
        
        n_documents = len(word_indexes)    
        
        max_index = 0
        for d in range(n_documents):
            document_max = np.max(word_indexes[d])
            if max_index < document_max:
                max_index = document_max
                
        n_word_types = max_index + 1
        
        theta = np.random.uniform(size=(n_documents, self.n_topic))
        old_theta = np.copy(theta)
        phi = np.random.uniform(size=(self.n_topic, n_word_types))
        svm_alpha = np.random.uniform(size=n_documents) + 1e-3
        reg_weights = np.random.normal(size=self.n_topic)
        
        for n in range(self.n_iter):
            sum_phi = []
            for k in range(self.n_topic):
                sum_phi.append(sum(phi[k]))
            ndk = theta
            nkv = np.zeros((self.n_topic, n_word_types))
            
            sample_X = []
            for d in range(n_documents):
                n_word_in_doc = len(word_indexes[d])
                sum_theta_d = sum(theta[d])
                prob_d = digamma(theta[d]) - digamma(sum_theta_d)
                temp1 = svm_alpha[d] / n_word_in_doc * sample_y[d]
                
                ndk[d, :] = 0.
                dummies = np.array([0.] * self.n_topic)
                for w in range(n_word_in_doc):
                    temp2 = temp1 * reg_weights[k]
                    word_no = word_indexes[d][w]
                    prob_w = digamma(phi[:, word_no]) - digamma(sum_phi)
                    latent_z = np.exp(prob_w + prob_d + temp2)
                    latent_z /= np.sum(latent_z)
                    
                    ndk[d, :] += latent_z * word_counts[d][w]
                    nkv[:, word_no] += latent_z * word_counts[d][w]
                    
                    z = np.argmax(latent_z)
                    dummies[z] += 1.
                sample_X.append(dummies / len(word_indexes[d]))
                
            theta = ndk + self.alpha
            phi = nkv + self.beta
            print(n, np.max(theta - old_theta))
            old_theta = np.copy(theta)
            t1 = time.time()
            
            sample_X = np.array(sample_X)
            trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1)
            trainer.train(sample_X, sample_y.reshape(-1, 1))
            svm_alpha = trainer.lagrange_multipliers
            reg_weights = (svm_alpha * sample_y).T.dot(sample_X)
            self.b = (sample_y - sample_X.dot(reg_weights)).mean()
            print("svm train time:%.2fsec"%(time.time() - t1))
            self.reg_weights = reg_weights
            y_est = slda.predict(sample_X)
            print("current accuracy", accuracy_score(y_est, sample_y))
            
        for k in range(self.n_topic):
            phi[k] = phi[k] / np.sum(phi[k])

        for d in range(n_documents):
            theta[d] = theta[d] / np.sum(theta[d])
            
        self.reg_weights = reg_weights
        return phi, theta, sample_X
    """
    stoplist = set('for a of the and to in'.split())
    texts = [[word for word in document.lower().split() if word not in stoplist]
	             for document in train_text]
	             
    dictionary = corpora.Dictionary(texts)
    dictionary.save('/tmp/deerwester.dict')
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    slda = SLDA(n_topic, 100)
    phi, theta, sample_X = slda.fit(corpus, train_target)
    y_est = slda.predict(sample_X)
    print(y_est)
    print("accuracy", accuracy_score(y_est, train_target))
    #print(theta) 
     
    lda = vb.LDA(n_topic, 100)
    phi, theta, sampe_X = lda.fit(corpus)
    #svm = SVM(svm_alpha, n_iter=20000)
    #reg_weights, self.b, svm_alpha = svm.fit(sample_X, sample_y)
    
    trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1)
    predictor = trainer.train(sample_X, train_target.reshape(-1, 1))
    y_est = []
    for x in sample_X:
        y_est.append(predictor.predict(x))
    print("accuracy", accuracy_score(y_est, train_target))
    
    np.set_printoptions(precision=3, suppress=True)