Python Loader示例

编程语言: Python

命名空间/包名称: utils.utils

类/类型: Loader

hotexamples.com的示例: 6

Python Loader - 已找到6个示例。这些是从开源项目中提取的最受好评的utils.utils.Loader现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

load_movies(3)

load_pres(3)

load_movies_test(1)

示例#1

显示文件

            if x == -1 or x == 0:
                f.write(str(classes[0]))
            else:
                f.write(str(classes[1]))
            f.write("\n")
    return res


"""      
X_train,Y_train = Loader.load_pres(fname)
X_test, _ = Loader.load_pres(tname)

result = predict(X_train, Y_train, X_test, save = "auteurs.txt", classes = ["M","C"], post_processing=True)

fig,ax = plt.subplots(figsize=(35,100)) 
ax.imshow(result.reshape(54,-1),interpolation="nearest")
"""
# plt.tight_layout()

X_train, Y_train = Loader.load_movies(fname_2)
X_test = Loader.load_movies_test(tname_2)

result_sent = predict(X_train,
                      Y_train,
                      X_test,
                      params=params_sentiments,
                      save="sentiments.txt",
                      classes=["-1", "1"],
                      post_processing=False,
                      equilibrage=False)

示例#2

显示文件

文件： test_solution.py 项目： LieceC/Projet-Rital-Luc-Dao

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn import linear_model as lin
from sklearn import svm
import sklearn.naive_bayes as nb

from wordcloud import WordCloud
from nltk.corpus import stopwords

import matplotlib.pyplot as plt
from time import time
import spacy
import numpy as np
import pickle
fname = "Data/AFDpresidentutf8/corpus.tache1.learn.utf8"
alltxts, alllabs = Loader.load_pres(fname)

params = {
    "lowercase": [False, True],
    "punct": [False, True],
    "marker": [False, True],
    "number": [False, True],
    "stemming": [False, Preprocessing.stem],
    "ligne": [None, -2, 0],
    "strip_accents": [False, True],
    "stopwords": [None, stop],  # set(STOPWORDS)],
    "Vectorizer": [CountVectorizer, TfidfVectorizer],
    "binary": [True, False],
    "class_weight": ["balanced", None],
    "max_features": [None, 10000, 7000],
    "ngram_range": [(1, 1), (1, 2)],

示例#3

显示文件

文件： vocabulary_extraction.py 项目： MrJuin/Projet-Rital-Luc-Dao

# -*- coding: utf-8 -*-
from utils.utils import Loader
from utils.preprocessing import Preprocessing
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from utils.oddsRatio import OddsRatioCloud
from time import time
import spacy
from nltk.corpus import stopwords

fname = "Data/AFDpresidentutf8/corpus.tache1.learn.utf8"
train_x,train_y = Loader.load_pres(fname)

stop = list(stopwords.words('french')) # + ['cet', 'cette', 'là']
params = {
    "lowercase":False,
    "punct":False,
    "marker":False,
    "number":False,
    "stemming": Preprocessing.lem, # Preprocessing.stem,
    "ligne": None,
    "strip_accents":False,
    "stopwords": stop # set(stop)
}
f = lambda x: Preprocessing.preprocessing(x,params)
#%%


vectorizer = CountVectorizer(preprocessor = f,lowercase=False,token_pattern = Preprocessing.token_pattern)

示例#4

显示文件

from utils.utils import Loader

fname = "Data/AFDpresidentutf8/corpus.tache1.learn.utf8"
alltxts,alllabs = Loader.load_pres(fname)


fname = "Data/AFDpresidentutf8/corpus.tache1.test.utf8"
alltxts_test,alllabs_test = Loader.load_pres(fname)

'''
print(len(alltxts),len(alllabs))
print(alltxts[0])
print(alllabs[0])
print(alltxts[-1])
print(alllabs[-1])

path = "Data/AFDmovies/movies1000/"
alltxts,alllabs = Loader.load_movies(path)
'''

示例#5

显示文件

文件： campaign.py 项目： MrJuin/Projet-Rital-Luc-Dao

from sklearn.linear_model import LogisticRegression

stop = list(stopwords.words('english'))
stop = list(
    set(stop) - {
        "no", "not", "nor"
        'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
        'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven',
        "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',
        "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn',
        "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't",
        'wouldn', "wouldn't", 'don', "don't", 'should', "should've"
    })

fname = "Data/AFDmovies/movies1000/"
alltxts, alllabs = Loader.load_movies(fname)
alltxts = np.array(alltxts)
alllabs = np.array(alllabs)

params = {
    # lowercase":[False,True],
    "punct": [False, True],
    # "marker":[False,True],
    # "number":[False,True],
    "stemming": [False, Preprocessing.stem_eng],  #,Preprocessing.stem],
    "ligne": [None, -2, 0],
    # "strip_accents":[False,True], #
    "stopwords": [None, stop],  # set(STOPWORDS)],
    "Vectorizer": [CountVectorizer, TfidfVectorizer],
    # "binary": [False,True],
    # "class_weight": [[0.1,1]],# ["balanced"],

示例#6

显示文件

# -*- coding: utf-8 -*-
from utils.utils import Loader
from utils.preprocessing import Preprocessing
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from utils.oddsRatio import OddsRatioCloud
from time import time
from nltk.corpus import stopwords
from utils.scoring import get_vectorizer

fname = "Data/AFDmovies/movies1000/"
train_x, train_y = Loader.load_movies(fname)

stop = list(stopwords.words('english'))
stop = list(
    set(stop) - {
        "no", "not", "nor"
        'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
        'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven',
        "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',
        "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn',
        "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't",
        'wouldn', "wouldn't", 'don', "don't", 'should', "should've"
    })

params = {
    "lowercase": False,
    "punct": False,
    # "marker":False,