Пример #1
0
import utils.utils as u
import utils.stats as stat
import utils.preprocessor_methods as pr

import storage.data as d

from numpy import *
from pylab import *


from sklearn.metrics import precision_recall_fscore_support

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s')

d.set_file_names()
docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = d.get_data()

"""
MaxEnt:
{
    'vect__ngram_range': (1, 1), 
    'vect__smooth_idf': True, 
    'vect__max_df': 0.5, 
    'vect__sublinear_tf': True, 
    'vect__preprocessor': <function placeholders at 0x9be31b4>, 
    'clf__penalty': 'l1', 
    'clf__C': 1.0, 
    'vect__use_idf': True
}
"""
c1_vect_options = {
Пример #2
0
import matplotlib

matplotlib.use('Agg')
import matplotlib.pyplot as plt

import utils.utils as u
import utils.stats as s

import storage.data as d
from pylab import arange, savefig

logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s')

d.set_file_names()
docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = d.get_data(
)

# labels = ['NB', 'SVM', 'MaxEnt', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Boosting']
# values = [0.549392, 0.34543453, 0.23432999, 0.654213, 0.3213495, 0.65343, 0.53999211, 0.34543453, 0.23432999, 0.654213]

labels = []
values = []


def test(clf):
    global labels, values

    y_predict = clf.predict(docs_test)
    score = u.score(y_test, y_predict)

    labels.append(str(clf))
Пример #3
0
import sys
import logging

from models import *

import storage.data as d
import storage.prediction_exporter as pe

import utils.stats as s
import utils.preprocessor_methods as pr


logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(message)s")

docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = (
    d.get_data()
)
pe.set_base_from_dataset(d.get_full_test_set())

vect_options = {
    "ngram_range": (1, 1),
    "sublinear_tf": True,
    "preprocessor": pr.remove_noise,
    "use_idf": False,
    "stop_words": None,
}

default_options = {"C": 1.0}

clf = SVM(docs_train, y_train, default_options=default_options, vect_options=vect_options)
if len(sys.argv) > 1: