Exemplo n.º 1
0
            print 'Downsample favor: ' + str(downsample_rate_favor)
            print 'Downsample none: ' + str(downsample_rate_none)
            train_data = ptd.getTrainingData()
            validate_data = ptd.getValidationData()
            #test_data = ptd.getTestData()
            sub_none = ptd.getDownsample2_0(train_data, "NONE", strength, downsample_rate_none)
            sub_favor = ptd.getDownsample2_0(train_data, "FAVOR", strength, downsample_rate_favor)
            against = train_data[train_data.Stance == "AGAINST"]

            train_data = pd.concat([sub_favor, sub_none, against])

        else:
            print("using nothing")
            train_data = ptd.getTrainingData()
            validate_data = ptd.getValidationData()
            test_data = ptd.getTestData()

        if use_upsample:
            print("using up sampling")
            train_data = pd.concat([train_data, train_data[train_data.Stance == "AGAINST"]])


        cv = StratifiedKFold(train_data.Stance, n_folds=10, shuffle=True, random_state=1)

        # Select classifiers to use
        classifiers = [
            #LinearSVC(C=1.178),
            SVC(C=6.9183097091893631, kernel='linear', shrinking=True)
            #MultinomialNB(alpha=0.1, fit_prior=False)
            #LogisticRegression()
        ]
Exemplo n.º 2
0
from sklearn.metrics import classification_report
from sklearn.cross_validation import cross_val_predict, StratifiedKFold
from sklearn.metrics import fbeta_score
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import MultinomialNB, BernoulliNB

import pandas as pd
import sklearn

strength = 'soft'

# ***** LOAD DATA   *****
train_data = ptd.getTrainingData()
validate_data = ptd.getValidationData()
test_data = ptd.getTestData()

cv = StratifiedKFold(train_data.Stance,
                     n_folds=10,
                     shuffle=True,
                     random_state=1)

# Select classifiers to use
classifiers = [
    LinearSVC(C=2.3988329190194899, multi_class='crammer_singer'),
    #SVC(C=5.2, kernel='linear')
    #MultinomialNB(alpha=0.63, fit_prior=True)
    #LogisticRegression(C=22.759, penalty='l2', solver='lbfgs')
    #SGDClassifier(alpha=0.0001, loss='squared_hinge')
    #BernoulliNB(alpha=0.1, fit_prior=True)
]
Exemplo n.º 3
0
import json

#################
#    Parameters #
#################

store_to_file = 0


################
#    Load Data #
################


print("Loading data...")
train_data = pd.concat([ptd.getTrainingData(), ptd.getValidationData(), ptd.getTestData()])
unlabelled_data = ptd.getUnlabelledData()


#########################
#   Train classifier    #
#########################


print("Training classifier")
best_classifier = LinearSVC(C=1.178)

pipeline = Pipeline([('vect', CountVectorizer(decode_error='ignore',
                                              analyzer='word',
                                              ngram_range=(1, 2),
                                              stop_words= None,