def lemmatize(serie): """ Takes the panda series and lemmatizes each word using the spacylefff lemmatizer Parameters ---------- serie : pandas.series The column that is processes Returns ------- lemmatized : pandas.series The lemmatized column """ pos = POSTagger() french_lemmatizer = LefffLemmatizer(after_melt=True) nlp = spacy.load('fr_core_news_sm') nlp.add_pipe(pos, name='pos', after='parser') nlp.add_pipe(french_lemmatizer, name='lefff', after='pos') lemmatized = serie.map(lambda post: post.lower()).map( remove_hyperlink).map(lambda post: [doc.lemma_ for doc in nlp(post)]) return lemmatized
def lemmatize(serie): pos = POSTagger() french_lemmatizer = LefffLemmatizer(after_melt = True) nlp = spacy.load('fr_core_news_sm') nlp.add_pipe(pos, name = 'pos', after = 'parser') nlp.add_pipe(french_lemmatizer, name = 'lefff', after = 'pos') lemmatized = serie.map( lambda x : [doc.lemma_ for doc in nlp(x)] ) return lemmatized
from textblob import TextBlob app = Flask(__name__) model_fr = pickle.load(open('model_fr.pkl', 'rb')) model_en = pickle.load(open('model_en.pkl', 'rb')) class_review = ["neutral", "positive", "negative"] sws_fr = stopwords.words('french') #stopwords fr sws_en = stopwords.words('english') #stopwords en list_sw_en_more = ["n't", "not", "no"] sws_en = sws_en + list_sw_en_more FrenchStemmer = SnowballStemmer("french") #stemming fr porter = PorterStemmer() #stemming en WNlemmatizer = WordNetLemmatizer() #lem en en nlp = spacy.load("fr_core_news_sm") #lem en fr pos = POSTagger() french_lemmatizer = LefffLemmatizer(after_melt=True) nlp.add_pipe(pos, name='pos', after='parser') nlp.add_pipe(french_lemmatizer, name='lefff', after='pos') @app.route('/') def home(): name = "nao" return render_template('home.html', name=name) @app.route('/test', methods=['POST']) def test(): result = request.form r = result['review']
def nlp_pos(): nlp = spacy.load('fr') french_pos_tagger = POSTagger() nlp.add_pipe(french_pos_tagger, name='POSTagger', after='parser') return nlp
def test_load_tag(model_dir): french_pos_tagger = POSTagger() tag_dict = french_pos_tagger.tag_dict tag = os.path.join(model_dir, 'tag_dict.json') french_pos_tagger.load_lexicon(tag) assert french_pos_tagger.tag_dict == tag_dict
def test_load_lexicon(model_dir): french_pos_tagger = POSTagger() lex_dict = french_pos_tagger.lex_dict lexicon = os.path.join(model_dir, 'lexicon.json') french_pos_tagger.load_lexicon(lexicon) assert french_pos_tagger.lex_dict == lex_dict
def create_melt_tagger(nlp, name): return POSTagger()
def test_load_tag(): french_pos_tagger = POSTagger() tag_dict = french_pos_tagger.tag_dict tag = os.path.join(MODELS_DIR, 'tag_dict.json') french_pos_tagger.load_lexicon(tag) assert french_pos_tagger.tag_dict == tag_dict
def test_load_lexicon(): french_pos_tagger = POSTagger() lex_dict = french_pos_tagger.lex_dict lexicon = os.path.join(MODELS_DIR, 'lexicon.json') french_pos_tagger.load_lexicon(lexicon) assert french_pos_tagger.lex_dict == lex_dict