Exemplo n.º 1
0
import numpy as np
import matplotlib.pyplot as plt
import sklearn.cross_validation as cv
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import precision_score, recall_score
from sklearn.grid_search import GridSearchCV
from sklearn.feature_extraction.text import TfidfTransformer
import cPickle as pickle
from sklearn.decomposition import TruncatedSVD, PCA, KernelPCA
from sklearn.feature_selection import SelectKBest, chi2

# Setup the features
extractor = FeatureExtract()
labels = extractor.labels
works = extractor.works

feature1 = extractor.feature_cp[0]

feature_shell = []
length = [2, 3]
for l in length:
    folder = '../shell/length%d_no_mirror' % l
    dict_list = []
    for work in works:
        data = pickle.load(open('%s/%s.pkl' % (folder, work), 'rb'))
        dict_list.append(data)
    feature, names = extractor._vectorize(dict_list)
    feature_shell.append(feature)
Exemplo n.º 2
0
sys.path.append('../')
from jos_learn.features import FeatureExtract

import numpy as np
import matplotlib.pyplot as plt
import sklearn.cross_validation as cv
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import precision_score, recall_score
from sklearn.grid_search import GridSearchCV
from sklearn.feature_extraction.text import TfidfTransformer

# Setup the features
extractor = FeatureExtract()
labels = extractor.labels
feature1, feature_name1 = extractor.feature_cp
feature2, feature_name2 = extractor.feature_ps_ql_pair

normalizer = TfidfTransformer()
feature1_norm = normalizer.fit_transform(feature1).toarray()
feature2_norm = normalizer.fit_transform(feature2).toarray()

feature1_unsecure = feature1[labels == 2]
feature2_unsecure = feature2[labels == 2]
feature1_unsecure_norm = feature1_norm[labels == 2]
feature2_unsecure_norm = feature2_norm[labels == 2]

feature1 = feature1[labels != 2]
feature2 = feature2[labels != 2]