예제 #1
0
from sklearn.linear_model import LogisticRegression

stop = list(stopwords.words('english'))
stop = list(
    set(stop) - {
        "no", "not", "nor"
        'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
        'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven',
        "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',
        "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn',
        "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't",
        'wouldn', "wouldn't", 'don', "don't", 'should', "should've"
    })

fname = "Data/AFDmovies/movies1000/"
alltxts, alllabs = Loader.load_movies(fname)
alltxts = np.array(alltxts)
alllabs = np.array(alllabs)

params = {
    # lowercase":[False,True],
    "punct": [False, True],
    # "marker":[False,True],
    # "number":[False,True],
    "stemming": [False, Preprocessing.stem_eng],  #,Preprocessing.stem],
    "ligne": [None, -2, 0],
    # "strip_accents":[False,True], #
    "stopwords": [None, stop],  # set(STOPWORDS)],
    "Vectorizer": [CountVectorizer, TfidfVectorizer],
    # "binary": [False,True],
    # "class_weight": [[0.1,1]],# ["balanced"],
예제 #2
0
            if x == -1 or x == 0:
                f.write(str(classes[0]))
            else:
                f.write(str(classes[1]))
            f.write("\n")
    return res


"""      
X_train,Y_train = Loader.load_pres(fname)
X_test, _ = Loader.load_pres(tname)

result = predict(X_train, Y_train, X_test, save = "auteurs.txt", classes = ["M","C"], post_processing=True)

fig,ax = plt.subplots(figsize=(35,100)) 
ax.imshow(result.reshape(54,-1),interpolation="nearest")
"""
# plt.tight_layout()

X_train, Y_train = Loader.load_movies(fname_2)
X_test = Loader.load_movies_test(tname_2)

result_sent = predict(X_train,
                      Y_train,
                      X_test,
                      params=params_sentiments,
                      save="sentiments.txt",
                      classes=["-1", "1"],
                      post_processing=False,
                      equilibrage=False)
예제 #3
0
# -*- coding: utf-8 -*-
from utils.utils import Loader
from utils.preprocessing import Preprocessing
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from utils.oddsRatio import OddsRatioCloud
from time import time
from nltk.corpus import stopwords
from utils.scoring import get_vectorizer

fname = "Data/AFDmovies/movies1000/"
train_x, train_y = Loader.load_movies(fname)

stop = list(stopwords.words('english'))
stop = list(
    set(stop) - {
        "no", "not", "nor"
        'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
        'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven',
        "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',
        "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn',
        "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't",
        'wouldn', "wouldn't", 'don', "don't", 'should', "should've"
    })

params = {
    "lowercase": False,
    "punct": False,
    # "marker":False,