def example(num_samples=8, num_features=2, grid_size=20, filename="example2.pdf"): l=[2.0,1.0,2.0,2.0,3.0,2.0,1.0,3.0,0.0,1.0,-1.0,-1.0,-1.0,-2.0,-2.0,-1.0] # here put your input training vectors lmat=np.asarray(l) lab=[1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0] # here put your labels llab=np.asarray(lab) samples = np.matrix(lmat # defining training matrix .reshape(num_samples, num_features)) # re-shaping the matrix labels = np.matrix(llab # defining training matrix .reshape(num_samples, 1)) # re-shaping the matrix trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) # linear kernel predictor = trainer.train(samples, labels) plot(predictor, samples, labels, grid_size, filename) # predict print "training performed , predict !" i=raw_input("space seperated coordinates of prediction vector : ") try: s=i.split() x=[float(s[0]),float(s[1])] except: sys.exit(0) try: xmat=np.asarray(x) xp=np.matrix(xmat # defining training matrix .reshape(1, num_features)) # re-shaping the predict input print predictor.predict(xp) except: sys.exit(0)
def example(num_samples=10, num_features=2, grid_size=20, filename="svm.pdf"): samples = np.matrix( np.random.normal(size=num_samples * num_features).reshape( num_samples, num_features)) labels = 2 * (samples.sum(axis=1) > 0) - 1.0 trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) predictor = trainer.train(samples, labels) plot(predictor, samples, labels, grid_size, filename)
def trainSVM(feature_samples, label_samples): samples = feature_samples labels = label_samples trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.01) predictor = trainer.train(samples, labels) return predictor
def example(num_samples=10, num_features=2, grid_size=20, filename="svm.pdf"): samples = np.matrix(np.random.normal(size=num_samples * num_features) # defining training matrix .reshape(num_samples, num_features)) # re-shaping the matrix print type(samples) labels = 2 * (samples.sum(axis=1) > 0) - 1.0 print labels trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) # linear kernel predictor = trainer.train(samples, labels) plot(predictor, samples, labels, grid_size, filename)
def trainSVM(feature_samples, label_samples): num_samples=41 num_features=4 grid_size=20 samples = feature_samples labels = label_samples trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.01) predictor = trainer.train(samples, labels) #plot(predictor, samples, labels, grid_size, filename) return predictor
def example(num_samples=10, num_features=784, grid_size=20, filename="svm.pdf"): samples = np.matrix( np.random.normal(size=num_samples * num_features).reshape( num_samples, num_features)) samples = train_x labels = 2 * (samples.sum(axis=1) > 0) - 1.0 labels = train_y - 2 print("sample variance :", np.var(samples)) trainer = svmpy.SVMTrainer(kernel.Kernel()._polykernel(120, 1), 0.1) predictor = trainer.train(samples, labels) predictAll(test_x, test_y, predictor)
def example(num_samples=10, num_features=2, grid_size=20, filename="svm.pdf"): samples = np.matrix( np.random.normal(size=num_samples * num_features).reshape( num_samples, num_features)) print samples.shape print samples labels = 2 * (samples.sum(axis=1) > 0) - 1.0 print labels.shape trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) predictor = trainer.train(samples, labels) with open("predictor.pkl", 'wb') as output: # Overwrites any existing file. pickle.dump(predictor, output, pickle.HIGHEST_PROTOCOL) with open('predictor.pkl', 'rb') as input: predictor = pickle.load(input) plot(predictor, samples, labels, grid_size, filename)
def fit(self, curpus, sample_y): word_indexes = [] word_counts = [] for row_curpus in curpus: row_indexes = [] row_counts = [] for w_i, w_c in row_curpus: row_indexes.append(w_i) row_counts.append(w_c) word_indexes.append(row_indexes) word_counts.append(row_counts) n_documents = len(word_indexes) max_index = 0 for d in range(n_documents): document_max = np.max(word_indexes[d]) if max_index < document_max: max_index = document_max n_word_types = max_index + 1 theta = np.random.uniform(size=(n_documents, self.n_topic)) old_theta = np.copy(theta) phi = np.random.uniform(size=(self.n_topic, n_word_types)) svm_alpha = np.random.uniform(size=n_documents) + 1e-3 reg_weights = np.random.normal(size=self.n_topic) for n in range(self.n_iter): sum_phi = [] for k in range(self.n_topic): sum_phi.append(sum(phi[k])) ndk = theta nkv = np.zeros((self.n_topic, n_word_types)) sample_X = [] for d in range(n_documents): n_word_in_doc = len(word_indexes[d]) sum_theta_d = sum(theta[d]) prob_d = digamma(theta[d]) - digamma(sum_theta_d) temp1 = svm_alpha[d] / n_word_in_doc * sample_y[d] ndk[d, :] = 0. dummies = np.array([0.] * self.n_topic) for w in range(n_word_in_doc): temp2 = temp1 * reg_weights[k] word_no = word_indexes[d][w] prob_w = digamma(phi[:, word_no]) - digamma(sum_phi) latent_z = np.exp(prob_w + prob_d + temp2) latent_z /= np.sum(latent_z) ndk[d, :] += latent_z * word_counts[d][w] nkv[:, word_no] += latent_z * word_counts[d][w] z = np.argmax(latent_z) dummies[z] += 1. sample_X.append(dummies / len(word_indexes[d])) theta = ndk + self.alpha phi = nkv + self.beta print(n, np.max(theta - old_theta)) old_theta = np.copy(theta) t1 = time.time() sample_X = np.array(sample_X) trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) trainer.train(sample_X, sample_y.reshape(-1, 1)) svm_alpha = trainer.lagrange_multipliers reg_weights = (svm_alpha * sample_y).T.dot(sample_X) self.b = (sample_y - sample_X.dot(reg_weights)).mean() print("svm train time:%.2fsec"%(time.time() - t1)) self.reg_weights = reg_weights y_est = slda.predict(sample_X) print("current accuracy", accuracy_score(y_est, sample_y)) for k in range(self.n_topic): phi[k] = phi[k] / np.sum(phi[k]) for d in range(n_documents): theta[d] = theta[d] / np.sum(theta[d]) self.reg_weights = reg_weights return phi, theta, sample_X
""" stoplist = set('for a of the and to in'.split()) texts = [[word for word in document.lower().split() if word not in stoplist] for document in train_text] dictionary = corpora.Dictionary(texts) dictionary.save('/tmp/deerwester.dict') corpus = [dictionary.doc2bow(text) for text in texts] slda = SLDA(n_topic, 100) phi, theta, sample_X = slda.fit(corpus, train_target) y_est = slda.predict(sample_X) print(y_est) print("accuracy", accuracy_score(y_est, train_target)) #print(theta) lda = vb.LDA(n_topic, 100) phi, theta, sampe_X = lda.fit(corpus) #svm = SVM(svm_alpha, n_iter=20000) #reg_weights, self.b, svm_alpha = svm.fit(sample_X, sample_y) trainer = svmpy.SVMTrainer(svmpy.Kernel.linear(), 0.1) predictor = trainer.train(sample_X, train_target.reshape(-1, 1)) y_est = [] for x in sample_X: y_est.append(predictor.predict(x)) print("accuracy", accuracy_score(y_est, train_target)) np.set_printoptions(precision=3, suppress=True)