__author__ = "hs"
__author__ = "hs"
__author__ = "NLP-PC"
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB
from load_data import load_selected_data

print("Start")
vectorizer = TFIDF_estimator()
texts, train_labels = load_selected_data(data_type="train")
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_selected_data(data_type="test")
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)

analysis_result(predict, true_labels)
    if count != 0:
        vec /= count
    return vec


from sklearn.preprocessing import scale

train_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_train])
if scaling == True:
    train_vecs = scale(train_vecs)

# Train word2vec on test tweets
# imdb_w2v.train(x_test)

# Build test tweet vectors then scale
test_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_test])
if scaling == True:
    test_vecs = scale(test_vecs)

# scaling to [0, 1] interval
min_max_scaler = MinMaxScaler()
train_vecs = min_max_scaler.fit_transform(train_vecs)
test_vecs = min_max_scaler.fit_transform(test_vecs)

# Use classification algorithm (i.e. Stochastic Logistic Regression) on training set, then assess model performance on test set
from classifiers import gNB, mNB
from analysis import analysis_result

pre = mNB(train_vecs, y_train, test_vecs)
analysis_result(pre, y_test)
    if count != 0:
        vec /= count
    return vec


from sklearn.preprocessing import scale

train_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_train])
if scaling == True:
    train_vecs = scale(train_vecs)

# Train word2vec on test tweets
# imdb_w2v.train(x_test)

# Build test tweet vectors then scale
test_vecs = np.concatenate([buildWordVector(z, n_dim) for z in x_test])
if scaling == True:
    test_vecs = scale(test_vecs)

# scaling to [0, 1] interval
min_max_scaler = MinMaxScaler()
train_vecs = min_max_scaler.fit_transform(train_vecs)
test_vecs = min_max_scaler.fit_transform(test_vecs)

# Use classification algorithm (i.e. Stochastic Logistic Regression) on training set, then assess model performance on test set
from classifiers import gNB, mNB
from analysis import analysis_result

pre = mNB(train_vecs, y_train, test_vecs)
analysis_result(pre, y_test)
Пример #4
0
__author__ = 'hs'
__author__ = 'hs'
__author__ = 'NLP-PC'
import feature_generating
import classifiers
import analysis
from load_data import load_train_data, load_processed_data
from load_data import load_test_data
from save_data import dump_picle
from vectorizers import TFIDF_estimator, anew_estimator
from analysis import analysis_result
from classifiers import mNB
from load_data import load_selected_data

print('Start')
vectorizer = TFIDF_estimator()
texts, train_labels = load_selected_data(data_type='train')
transformed_train = vectorizer.fit_transform(texts)
testdata, true_labels = load_selected_data(data_type='test')
transformed_test = vectorizer.transform(testdata)

predict = mNB(transformed_train, train_labels, transformed_test)

analysis_result(predict, true_labels)