def test(): """ test Perceptron, SVM and LDA accuracy """ tested_models = [ TestedModel('Perceptron', models.Perceptron()), TestedModel('SVM', models.SVM()), TestedModel('LDA', models.LDA()), ] k = 10000 iterations = 500 for i, m in enumerate(MS): for j in range(iterations): X, y = sample_d(m) X_t, y_t = sample_d(k) for tested in tested_models: tested.model.fit(X, y) score = tested.model.score(X_t, y_t) tested.add_accuracy(m, score['accuracy']) plt.figure() for tested in tested_models: plt.plot(MS, [tested.accuracy[m] for m in MS], marker='.', label=tested.name) plt.legend() plt.title('Training batch size vs. accuracy') plt.xlabel('m') plt.ylabel('accuracy') plt.show() for tested in tested_models: print(tested.name, tested.accuracy)
def main(args): """ Fit a model's parameters given the parameters specified in args. """ # Load the data. X, vocab = build_dtm(args.datadir, args.num_documents, args.max_words) # build the appropriate model inference = models.MeanFieldVariationalInference( num_topics=args.num_topics, num_docs=X.shape[0], num_words=X.shape[1], alpha=args.alpha, beta=args.beta, epsilon=args.epsilon) # Run the inference method model = models.LDA(inference=inference) model.fit(X=X, iterations=args.num_vi_iterations, estep_iterations=args.num_estep_iterations) # predict topic assignments for words in corpus preds = model.predict(vocab=vocab, K=args.top_k) # output model predictions with open(args.predictions_file, 'w') as file: for pred in preds: file.write(pred + '\n')
def main(args): """ Fit a model's parameters given the parameters specified in args. """ # Load the data. X, vocab = build_dtm(args.data, args.num_documents) # build the appropriate model if args.inference.lower() == 'gibbs-sampling': inference = models.GibbsSampling(num_topics=args.num_topics, num_docs=X.shape[0], num_words=X.shape[1], alpha=args.alpha, beta=args.beta) elif args.inference.lower() == 'sum-product': inference = models.SumProduct(num_topics=args.num_topics, num_docs=X.shape[0], num_words=X.shape[1], num_nonzero=X.nnz, alpha=args.alpha, beta=args.beta) else: raise Exception( 'The method given by --inference is not yet supported.') # Run the inference method model = models.LDA(inference=inference) model.fit(X=X, iterations=args.iterations) # predict topic assignments for words in corpus preds = model.predict(vocab=vocab, K=args.top_k) # output model predictions with open(args.predictions_file, 'w') as file: for pred in preds: file.write(pred + '\n')
pca.fit(data) new_data2 = pca.fit_transform(data) new_test_data2 = pca.fit_transform(test_data) print('pca dimension fit') ''' svd.fit(data) print('SVD fit') new_data = svd.fit_transform(data) new_test_data = svd.fit_transform(test_data) print('dimensions transformed') print(new_data.shape) lda_model = models.LDA() s_lda = LinearDiscriminantAnalysis(solver = 'eigen') s_lda.fit(new_data,y) s_lda.fit(new_data2,y) lda_model.fit(new_data,y) print('model fit') pred = lda_model.predict(new_test_data) print(lda_model.score(np.array(pred),test_y)) print(s_lda.score(new_test_data,test_y)) clf = LinearSVC(random_state=0, tol=1e-5) clf.fit(new_data,y)
# Selecting the best models for classification. # May 2019 import sys sys.path.insert(0, '/Users/chirathhettiarachchi/tensorflow/clardia/preprocess') import experiments as exp import models as model import json import warnings warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=RuntimeWarning) warnings.filterwarnings("ignore", category=UserWarning) # Read selected features with open('../cache/selected_features.json') as f: data = json.load(f) features = data['diabetes'] print("Selected Features: ", features) x, y = exp.experiment_3(features) cutoff = x.shape[0] - 30 valx = x[cutoff:] valy = y[cutoff:] x = x[:cutoff] y = y[:cutoff] model = model.LDA(x, y, x, y, 4)