示例#1
0
def lemmatize(serie):
    """
    Takes the panda series and lemmatizes each word using
    the spacylefff lemmatizer

    Parameters
    ----------
        serie : pandas.series
            The column that is processes

    Returns
    -------
        lemmatized : pandas.series
            The lemmatized column
    """
    pos = POSTagger()
    french_lemmatizer = LefffLemmatizer(after_melt=True)

    nlp = spacy.load('fr_core_news_sm')
    nlp.add_pipe(pos, name='pos', after='parser')
    nlp.add_pipe(french_lemmatizer, name='lefff', after='pos')

    lemmatized = serie.map(lambda post: post.lower()).map(
        remove_hyperlink).map(lambda post: [doc.lemma_ for doc in nlp(post)])
    return lemmatized
示例#2
0
    def _get_lang(self):
        """get Lang of the page according to `<html lang='en' >` attribute"""
        self.lang = self.soup.find('html')['lang'][:2]

        self.nlp = spacy.load(self.lang)

        if self.lang == 'fr':
            french_lemmatizer = LefffLemmatizer()
            self.nlp.add_pipe(french_lemmatizer, name='lefff')
示例#3
0
def lemmatization(x):
    import spacy
    from spacy_lefff import LefffLemmatizer, POSTagger
    #spacy_lefff installed package to lemmatize french (since it is not available in NLTK)
    # install language model python -m spacy download fr
    nlp = spacy.load('fr_core_news_md')
    french_lemmatizer = LefffLemmatizer()
    nlp.add_pipe(french_lemmatizer, name='lefff')
    doc = nlp(x)
    return " ".join([d.lemma_ for d in doc])
def lemmatize(serie):
    pos = POSTagger()
    french_lemmatizer = LefffLemmatizer(after_melt = True)
    
    nlp = spacy.load('fr_core_news_sm')
    nlp.add_pipe(pos, name = 'pos', after = 'parser')
    nlp.add_pipe(french_lemmatizer, name = 'lefff', after = 'pos')
    
    lemmatized = serie.map(
        lambda x : [doc.lemma_ for doc in nlp(x)]
    )
    return lemmatized
示例#5
0
import re
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from spacy_lefff import LefffLemmatizer, POSTagger
from collections import Counter
from nltk.tokenize import word_tokenize
from nltk import pos_tag

from sklearn.feature_extraction.text import TfidfVectorizer

import spacy
import pprint

####### CONSTANTE ##############
nlp_french = spacy.load('fr')
french_lemmatizer = LefffLemmatizer()
nlp_french.add_pipe(french_lemmatizer, name='lefff', before="ner")


class Site(object):
    """dans site: mot clef, urls interne, url externe, nom de domaine, document_matrix"""
    def __init__(self, url):
        self.root_url = urlparse(url).netloc
        self.entry_point = url
        self.site_url = urlparse(url).scheme + "://" + self.root_url
        self.home_page = self.factory_page(url)

    # une factory method
    def factory_page(self, page_url):
        return Page(page_url, self.root_url, self.site_url)
app = Flask(__name__)
model_fr = pickle.load(open('model_fr.pkl', 'rb'))
model_en = pickle.load(open('model_en.pkl', 'rb'))
class_review = ["neutral", "positive", "negative"]
sws_fr = stopwords.words('french')  #stopwords fr
sws_en = stopwords.words('english')  #stopwords en
list_sw_en_more = ["n't", "not", "no"]
sws_en = sws_en + list_sw_en_more
FrenchStemmer = SnowballStemmer("french")  #stemming fr
porter = PorterStemmer()  #stemming en

WNlemmatizer = WordNetLemmatizer()  #lem en en
nlp = spacy.load("fr_core_news_sm")  #lem en fr
pos = POSTagger()
french_lemmatizer = LefffLemmatizer(after_melt=True)
nlp.add_pipe(pos, name='pos', after='parser')
nlp.add_pipe(french_lemmatizer, name='lefff', after='pos')


@app.route('/')
def home():
    name = "nao"
    return render_template('home.html', name=name)


@app.route('/test', methods=['POST'])
def test():
    result = request.form
    r = result['review']
    #prediction = "positive"
示例#7
0
def nlp():
    nlp = spacy.load('fr')
    french_lemmatizer = LefffLemmatizer()
    nlp.add_pipe(french_lemmatizer, after='parser')
    return nlp
示例#8
0
def add_lefff_lemma_nlp(nlp_pos):
    french_lemmatizer = LefffLemmatizer(after_melt=True)
    nlp_pos.add_pipe(french_lemmatizer, after='POSTagger')
    return nlp_pos
示例#9
0
def create_french_lemmatizer(nlp, name):
    return LefffLemmatizer(after_melt=True, default=True)
示例#10
0
def test_lemmatizer_default():
    french_lemmatizer = LefffLemmatizer(default=True)
    assert french_lemmatizer.lemmatize(u"Apple", u"NOUN") == u"apple"
示例#11
0
def test_lemmatizer_exception():
    french_lemmatizer = LefffLemmatizer()
    assert french_lemmatizer.lemmatize(u"unknow34", u"unknown") is None
示例#12
0
 def __init__(self):
     self.nlp = spacy.load('fr')
     self.lemmatizer = LefffLemmatizer()
     self.nlp.add_pipe(self.lemmatizer, name='lefff')
示例#13
0
 def create_french_lemmatizer(nlp, name):
     return LefffLemmatizer()