from sklearn.neighbors import KNeighborsClassifier from utils import data_utils, vectorize class KNN: def __init__(self, vectorizer, data_fname='../../data/imdb.csv', n_neighbors=10): self.data_fname = data_fname self.n_neighbors = n_neighbors self.vectorizer = vectorizer def run(self): X_train, X_test, y_train, y_test = data_utils.load_train_test_data(self.data_fname) train_features, test_features = self.vectorizer.feature_extraction(X_train, X_test) neigh = KNeighborsClassifier(n_neighbors=self.n_neighbors) neigh.fit(train_features, y_train) print(neigh.score(test_features, y_test)) if __name__ == '__main__': vectorizer = vectorize.Vectorizer('BOW', ngram_range=(1, 1)) model = KNN(vectorizer=vectorizer) model.run()
if test_acc / X_test.shape[0] > max_acc: max_acc = test_acc / X_test.shape[0] step = i else: stop_num += 1 if stop_num == self.para['stop_num']: break print('Best Performance: %.3f at Epoch %d' % (max_acc, step)) if __name__ == '__main__': vectorizer = vectorize.Vectorizer( 'Word2Vec', emb_fname='/data/linpq/Word2Vec/glove.840B.300d.txt', word_index_fname='../../data/imdb_word_index.json') para = { 'learning_rate': 0.001, 'l2_reg': 1e-5, 'hidden_size': 300, 'max_seq_len': 500, 'num_layers': 1, 'epoch_num': 100, 'batch_size': 200, 'stop_num': 10 } classifier = Classifier(vectorizer=vectorizer, para=para) classifier.run()
test_acc / X_test.shape[0])) if test_acc / X_test.shape[0] > max_acc: max_acc = test_acc / X_test.shape[0] step = i else: stop_num += 1 if stop_num == self.para['stop_num']: break print('Best Performance: %.3f at Epoch %d' % (max_acc, step)) if __name__ == '__main__': vectorizer = vectorize.Vectorizer( 'Word2Vec', emb_fname='../../data/imdb.vec', word_index_fname='../../data/imdb_word_index.json') para = { 'learning_rate': 0.001, 'l2_reg': 1e-4, 'hidden_size': 300, 'max_seq_len': 500, 'epoch_num': 100, 'batch_size': 200, 'stop_num': 10 } classifier = Classifier(vectorizer=vectorizer, para=para) classifier.run()