def pickle_labeled_sentiment_dataset():
    dataset_with_label = get_labeled_sentiment_dataset()

    pickle_as = os.path.join(get_project_root(),
                             'data/pickles/sentiment_data_labeled_v1.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(dataset_with_label, f)
Пример #2
0
def get_labeled_sentiment_data():
    """
    Load pre-pickled sentiment data with labels.
    """
    pickle_file = os.path.join(utils.get_project_root(),
                               'data/pickles/sentiment_data_labeled.pickle')
    with open(pickle_file, 'rb') as f:
        return pickle.load(f)
def pickle_word_features(num_feat=5000):
    default_training_data = utils.get_training_data_path()
    word_dict = get_word_freq_dict(default_training_data)
    features = list(word_dict.keys())[:num_feat]

    pickle_as = os.path.join(get_project_root(),
                             'data/pickles/word_features_5k.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(features, f)
def pickle_feature_sets():
    dataset_with_label = get_labeled_sentiment_dataset()
    # a list of tuples: e.g. [({'good': True, 'silly': False, ...}, 'pos'),
    # ({'good': False, 'silly': True, ...}, 'neg'), (...)]
    feature_sets = [(find_features(text), sentiment)
                    for (text, sentiment) in dataset_with_label]
    random.shuffle(feature_sets)

    pickle_as = os.path.join(get_project_root(),
                             'data/pickles/feature_sets.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(feature_sets, f)
Пример #5
0
def get_word_features():
    """
    Returns the top word features in training data.
    Use the existing pickled data.

    :param num_feat: number of features to consider, defaulted to 5k.
    :return: a list of top features (words)
    """
    pickle_file = os.path.join(utils.get_project_root(),
                               'data/pickles/word_features_5k.pickle')
    with open(pickle_file, 'rb') as f:
        return pickle.load(f)
def train_mnb_clf(training_set, testing_set):
    """
    accuracy: 73.28
    """
    mnb_classifier = SklearnClassifier(MultinomialNB())
    mnb_classifier.train(training_set)
    print("Multinomial NB Classifier accuracy:",
          (classify.accuracy(mnb_classifier, testing_set)) * 100)

    pickle_as = os.path.join(utils.get_project_root(),
                             'data/classifiers/mnb_classifier_5k.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(mnb_classifier, f)
Пример #7
0
def get_feature_sets():
    """
    Load pre-pickled feature sets data.
    Feature sets structure: a list of tuples: e.g.
    [({'good': True, 'silly': False, ...}, 'pos'),
    ({'good': False, 'silly': True, ...}, 'neg'), (...)]

    :return: Feature sets.
    """
    pickle_file = os.path.join(utils.get_project_root(),
                               'data/pickles/feature_sets.pickle')
    with open(pickle_file, 'rb') as f:
        return pickle.load(f)
def train_linear_svc_clf(training_set, testing_set):
    """
    accuracy: 72.01
    """
    linear_svc_classifier = SklearnClassifier(LinearSVC())
    linear_svc_classifier.train(training_set)
    print("LinearSVC Classifier accuracy:",
          (classify.accuracy(linear_svc_classifier, testing_set)) * 100)

    pickle_as = os.path.join(
        utils.get_project_root(),
        'data/classifiers/linear_svc_classifier_5k.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(linear_svc_classifier, f)
def train_bernoulli_nb_clf(training_set, testing_set):
    """
    accuracy: 74.64
    """
    bernoulli_nb_classifier = SklearnClassifier(BernoulliNB())
    bernoulli_nb_classifier.train(training_set)
    print("Bernoulli NB Classifier accuracy:",
          (classify.accuracy(bernoulli_nb_classifier, testing_set)) * 100)

    pickle_as = os.path.join(
        utils.get_project_root(),
        'data/classifiers/bernoulli_nb_classifier_5k.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(bernoulli_nb_classifier, f)
def train_naive_bayes_clf(training_set, testing_set):
    """
    accuracy: 74.26
    """
    naive_bayes_classifier = NaiveBayesClassifier.train(training_set)
    print('Naive Bayes model accuracy:',
          (classify.accuracy(naive_bayes_classifier, testing_set)) * 100)

    naive_bayes_classifier.show_most_informative_features(15)

    pickle_as = os.path.join(utils.get_project_root(),
                             'data/classifiers/naive_bayes_5k.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(naive_bayes_classifier, f)
def train_logistic_regression_clf(training_set, testing_set):
    """
    accuracy: 74.59
    """
    logistic_regression_classifier = SklearnClassifier(LogisticRegression())
    logistic_regression_classifier.train(training_set)
    print('Logistic Regression Classifier accuracy:',
          (classify.accuracy(logistic_regression_classifier, testing_set)) *
          100)

    pickle_as = os.path.join(
        utils.get_project_root(),
        'data/classifiers/logistic_regression_classifier_5k.pickle')
    with open(pickle_as, 'wb') as f:
        pickle.dump(logistic_regression_classifier, f)
def load_naive_bayes_clf():
    pickle_clf = os.path.join(utils.get_project_root(),
                              'data/classifiers/naive_bayes_5k.pickle')
    with open(pickle_clf, 'rb') as f:
        return pickle.load(f)
def load_linear_svc_clf():
    pickle_clf = os.path.join(
        utils.get_project_root(),
        'data/classifiers/linear_svc_classifier_5k.pickle')
    with open(pickle_clf, 'rb') as f:
        return pickle.load(f)
def load_logistic_regression_clf():
    pickle_clf = os.path.join(
        utils.get_project_root(),
        'data/classifiers/logistic_regression_classifier_5k.pickle')
    with open(pickle_clf, 'rb') as f:
        return pickle.load(f)
def load_bernoulli_nb_clf():
    pickle_clf = os.path.join(
        utils.get_project_root(),
        'data/classifiers/bernoulli_nb_classifier_5k.pickle')
    with open(pickle_clf, 'rb') as f:
        return pickle.load(f)