예제 #1
0
from sklearn.neighbors import KNeighborsClassifier
from utils import data_utils, vectorize


class KNN:
    def __init__(self, vectorizer, data_fname='../../data/imdb.csv', n_neighbors=10):
        self.data_fname = data_fname
        self.n_neighbors = n_neighbors
        self.vectorizer = vectorizer

    def run(self):
        X_train, X_test, y_train, y_test = data_utils.load_train_test_data(self.data_fname)
        train_features, test_features = self.vectorizer.feature_extraction(X_train, X_test)
        neigh = KNeighborsClassifier(n_neighbors=self.n_neighbors)
        neigh.fit(train_features, y_train)
        print(neigh.score(test_features, y_test))


if __name__ == '__main__':
    vectorizer = vectorize.Vectorizer('BOW', ngram_range=(1, 1))
    model = KNN(vectorizer=vectorizer)
    model.run()
예제 #2
0
            if test_acc / X_test.shape[0] > max_acc:
                max_acc = test_acc / X_test.shape[0]
                step = i
            else:
                stop_num += 1

            if stop_num == self.para['stop_num']:
                break

        print('Best Performance: %.3f at Epoch %d' % (max_acc, step))


if __name__ == '__main__':
    vectorizer = vectorize.Vectorizer(
        'Word2Vec',
        emb_fname='/data/linpq/Word2Vec/glove.840B.300d.txt',
        word_index_fname='../../data/imdb_word_index.json')
    para = {
        'learning_rate': 0.001,
        'l2_reg': 1e-5,
        'hidden_size': 300,
        'max_seq_len': 500,
        'num_layers': 1,
        'epoch_num': 100,
        'batch_size': 200,
        'stop_num': 10
    }
    classifier = Classifier(vectorizer=vectorizer, para=para)
    classifier.run()
예제 #3
0
                   test_acc / X_test.shape[0]))

            if test_acc / X_test.shape[0] > max_acc:
                max_acc = test_acc / X_test.shape[0]
                step = i
            else:
                stop_num += 1

            if stop_num == self.para['stop_num']:
                break

        print('Best Performance: %.3f at Epoch %d' % (max_acc, step))


if __name__ == '__main__':
    vectorizer = vectorize.Vectorizer(
        'Word2Vec',
        emb_fname='../../data/imdb.vec',
        word_index_fname='../../data/imdb_word_index.json')
    para = {
        'learning_rate': 0.001,
        'l2_reg': 1e-4,
        'hidden_size': 300,
        'max_seq_len': 500,
        'epoch_num': 100,
        'batch_size': 200,
        'stop_num': 10
    }
    classifier = Classifier(vectorizer=vectorizer, para=para)
    classifier.run()