__author__ = "hs" __author__ = "hs" __author__ = "NLP-PC" import feature_generating import classifiers import analysis from load_data import load_train_data, load_processed_data from load_data import load_test_data from save_data import dump_picle from vectorizers import TFIDF_estimator, anew_estimator from analysis import analysis_result from classifiers import mNB from load_data import load_selected_data print("Start") vectorizer = TFIDF_estimator() texts, train_labels = load_selected_data(data_type="train") transformed_train = vectorizer.fit_transform(texts) testdata, true_labels = load_selected_data(data_type="test") transformed_test = vectorizer.transform(testdata) predict = mNB(transformed_train, train_labels, transformed_test) analysis_result(predict, true_labels)
if count != 0: vec /= count return vec from sklearn.preprocessing import scale train_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_train]) if scaling == True: train_vecs = scale(train_vecs) # Train word2vec on test tweets # imdb_w2v.train(x_test) # Build test tweet vectors then scale test_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_test]) if scaling == True: test_vecs = scale(test_vecs) # scaling to [0, 1] interval min_max_scaler = MinMaxScaler() train_vecs = min_max_scaler.fit_transform(train_vecs) test_vecs = min_max_scaler.fit_transform(test_vecs) # Use classification algorithm (i.e. Stochastic Logistic Regression) on training set, then assess model performance on test set from classifiers import gNB, mNB from analysis import analysis_result pre = mNB(train_vecs, y_train, test_vecs) analysis_result(pre, y_test)
__author__ = 'hs' __author__ = 'hs' __author__ = 'NLP-PC' import feature_generating import classifiers import analysis from load_data import load_train_data, load_processed_data from load_data import load_test_data from save_data import dump_picle from vectorizers import TFIDF_estimator, anew_estimator from analysis import analysis_result from classifiers import mNB from load_data import load_selected_data print('Start') vectorizer = TFIDF_estimator() texts, train_labels = load_selected_data(data_type='train') transformed_train = vectorizer.fit_transform(texts) testdata, true_labels = load_selected_data(data_type='test') transformed_test = vectorizer.transform(testdata) predict = mNB(transformed_train, train_labels, transformed_test) analysis_result(predict, true_labels)