def Insert_Donnees_AVI() : con = psycopg2.connect(database="BASE_CURATED_ZONE", user="******", password="******", host="127.0.0.1", port="5433") cur = con.cursor() myFilePathName = myPathRoot_CURRATEDZONE + "AVI.txt" myFilePtr = open(myFilePathName, "r", encoding="utf-8", errors="ignore") myFileContents = myFilePtr.readlines() del myFileContents[0] for myLineRead in myFileContents: line = myLineRead.split(";") cle_unique = int(line[0]) emplacement_source=line[1] datetime_ingestion=line[2] privacy_level=line[3] entreprise=line[4] if line[5] == 'NULL' : date = 'May 24, 2020' else : date = line[5] review_titre=TextBlob(line[6], pos_tagger = PatternTagger(), analyzer= PatternAnalyzer()).sentiment[0] status_employe=line[7] lieu=line[8] recommande=line[9] commentaire=TextBlob(line[10], pos_tagger = PatternTagger(), analyzer= PatternAnalyzer()).sentiment[0] avantage= line[11].lower() incovenient= line[12].lower() cur.execute("INSERT INTO AVIS VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", (cle_unique,emplacement_source,datetime_ingestion,privacy_level,entreprise,date,review_titre,status_employe,lieu,recommande,commentaire,avantage,incovenient)) myFilePtr.close() con.commit() con.close() return (True)
def sentiment_analysis_basic(tweets, lang): positive_tweets = 0 neutral_tweets = 0 negative_tweets = 0 for tweet in tweets: if lang == 'english': analysis = TextBlob(tweet) sentiment = analysis.sentiment.polarity else: # french analysis = TextBlob(tweet, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) sentiment = analysis.sentiment[0] if sentiment > 0: positive_tweets += 1 elif sentiment == 0: neutral_tweets += 1 else: negative_tweets += 1 total_tweets_analysed = positive_tweets + neutral_tweets + negative_tweets positive_tweets_percentage = positive_tweets / total_tweets_analysed * 100 neutral_tweets_percentage = neutral_tweets / total_tweets_analysed * 100 print("\nNo. of positive tweets = {} Percentage = {}".format( positive_tweets, positive_tweets_percentage)) print("No. of neutral tweets = {} Percentage = {}".format( neutral_tweets, neutral_tweets_percentage)) print("No. of negative tweets = {} Percentage = {}".format( negative_tweets, 100 - (positive_tweets_percentage + neutral_tweets_percentage)))
def match_syntagm_text_blob_multi(syntagms, text): from textblob import TextBlob from textblob_fr import PatternTagger, PatternAnalyzer blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) return match_sequences(syntagms, list(blob.tokenize()))
def analyse(comments): allcomments = [] polarity = [] for comment in comments: try: allcomments.append(comment) try: if detect(comment) == 'de': text = TextBlobDE(comment) x = text.sentiment.polarity polarity.append(x) elif detect(comment) == 'fr': blob = TextBlob(comment, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) x = blob.sentiment[0] polarity.append(x) else: text = TextBlob(comment) x = text.sentiment.polarity polarity.append(x) except: text = TextBlob(comment) x = text.sentiment.polarity polarity.append(x) except: pass return allcomments, polarity
def extract_keywords(txt): """ Extract keywords from FR text""" blob = TextBlob(any2utf8(txt), pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) tags = blob.tags return [tag for tag in tags]
def saveSentiments(): """ opinion minning average for one day save data in CSV file with date """ data = pd.read_csv("input/" + str(datetime.utcnow().date().today()) + ".csv") f = open("output/sentimentAnalysis.csv", "a", newline='', encoding='utf-8') writer = csv.writer(f, delimiter=',') polarityAverage = [] subjectivityAverage = [] for i in range(len(data.index)): blob = TextBlob(cleanText(data.iloc[i, 1]), pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) polarityAverage.append(blob.sentiment[0]) subjectivityAverage.append(blob.sentiment[1]) sum = 0 for ele in polarityAverage: sum += ele polarityAverage = sum / len(polarityAverage) for ele in subjectivityAverage: sum += ele subjectivityAverage = sum / len(subjectivityAverage) writer.writerow([ str(datetime.utcnow().date().today()), len(data.index), polarityAverage, subjectivityAverage ]) f.close()
def extract_dictionary(txt): """ Extract from FR text""" blob = TextBlob(any2utf8(txt), pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) seg_list = blob.words return list(seg_list)
def get_polarity(topic): #Step 2 - Prepare query features #List of candidates to French Republicans Primary Elections candidates_names = ['Clinton', 'Trump'] #Hashtag related to the debate name_of_debate = str(topic) #Date of the debate : October 13th since_date = "2016-10-01" until_date = "2016-11-06" #Step 3 - Retrieve Tweets and Save Them all_polarities = dict() for candidate in candidates_names: this_candidate_polarities = [] #Get the tweets about the debate and the candidate between the dates this_candidate_tweets = api.search(q=[name_of_debate, candidate], count=100, since = since_date, until=until_date) #Save the tweets in csv with open('%s_tweets.csv' % candidate, 'wb') as this_candidate_file: this_candidate_file.write('tweet,sentiment_label\n') for tweet in this_candidate_tweets: analysis = TextBlob(tweet.text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) #Get the label corresponding to the sentiment analysis this_candidate_polarities.append(analysis.sentiment[0]) this_candidate_file.write('%s,%s\n' % (tweet.text.encode('utf8'), get_label(analysis))) #Save the mean for final results all_polarities[candidate] = np.mean(this_candidate_polarities) #Step bonus - Print a Result sorted_analysis = sorted(all_polarities.items(), key=operator.itemgetter(1), reverse=True) print 'Mean Sentiment Polarity in descending order :' for candidate, polarity in sorted_analysis: print '%s : %0.3f' % (candidate, polarity)
def textblob_sentiment_fr(tweets): """ Get Textblob Sentiment scores (fr) """ tb_fr = [] for tweet in tweets: blob_tweet_fr = TextBlob(tweet, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) tb_fr.append(blob_tweet_fr.sentiment[0]) return tb_fr
def Sentiment_article(article): #Permet d'obtenir le sentiment dégagé d'un article: 0 étant plutot neutre, 1 positif et -1 négatif tok_art = sent_tokenize(article, language='french') sentiment = 0 for sentences in tok_art: sentiment = sentiment + TextBlob(sentences,pos_tagger = PatternTagger(), analyzer = PatternAnalyzer()).sentiment[0] sentiment = sentiment / len(tok_art) return sentiment
def querytextanswers(self) -> List[TextAnswer]: res = list() form_ids = [f.id for f in self.forms_list] student_ids = [s.id for s in self.students_list] questions = [ q for q in self.questions_list if q.isint == Const.DBFALSE ] question_ids = [q.id for q in questions] answers = db.session.query(Answer).filter( Answer.form_id.in_(form_ids), Answer.student_id.in_(student_ids), Answer.question_id.in_(question_ids)).order_by(Answer.question_id) # # see example https://github.com/sloria/textblob-fr for question in questions: polvalues = list() subvalues = list() texts = [ answer.text for answer in answers.filter(Answer.question_id == question.id) ] for text in texts: blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) qsentiment = blob.sentiment polvalues.append(qsentiment[0]) subvalues.append(qsentiment[1]) res.append( TextAnswer(questiontext=question.text, polarities=polvalues, subjectivities=subvalues)) return res
def analize_sentiment_fr(tweet): analysis = TextBlob(clean_tweet(tweet),pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) #print(analysis) if float(analysis.sentiment[0]) > 0: return 1 elif float(analysis.sentiment[0]) == 0: return 0 else: return -1
def sentiment_analyser(text): blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) sentiments = blob.sentiment # on renvoie le résultat return sentiments
def get_usefull_words(text): blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) blob = blob.correct() print(blob) print(blob.noun_phrases) for tag in blob.tags: if tag[1] in ['NN', 'JJ', 'VB']: print(tag[0],tag[1])
def getSentimentForReview(review): #print(review) blob = TextBlob(review, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) sentiment = blob.sentiment #print(sentiment) #print() return sentiment
def run(self): Myanalyzer = SentimentalAnalysis print("Analyzer begin") # #consumer = KafkaConsumer(bootstrap_servers='localhost:9092','37.163.95.205', consumer = KafkaConsumer(bootstrap_servers='localhost:9092', auto_offset_reset='earliest', consumer_timeout_ms=10000) consumer.subscribe(['my-topic']) print("Subscription analyzer: OK") moviesList = [] for message in consumer: if allocine: jsoned = json.loads(message.value) #note = Myanalyzer.analysis(jsoned['spectators_reviews']) from textblob import TextBlob from textblob_fr import PatternTagger, PatternAnalyzer print(jsoned) if (jsoned['spectators_reviews']): note = 0 nbNote = 1 for elem in jsoned['spectators_reviews']: blob = TextBlob(elem, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) note += blob.sentiment[0] nbNote += 1 note /= nbNote # print("Note obtenu ", note) jsoned['ownRating'] = note moviesList.append(jsoned) else: jsoned = json.loads(message.value) #moviesList.append(jsoned['review']) note = Myanalyzer.analysis(jsoned['review']) if (note): jsoned['ownRating'] = note else: jsoned['ownRating'] = 0 moviesList.append(jsoned) producer = KafkaProducer(bootstrap_servers='localhost:9092') for data in moviesList: if (debug): print("dataAnalyzer to send ", data) data[ 'spectators_reviews'] = "" # Supprime les reviews pour pas s'embeter producer.send("my-ratings", json.dumps(data)) print("Sent") producer.flush() producer.close() print("DataAnalyse produced")
def is_neutral(self, text): btext = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) if btext.sentiment[0] == 0: if (self.bayes_clf.predict([text])[0] == 0) or (self.svm_clf.predict([text])[0] == 0): return True else: if (self.bayes_clf.predict([text])[0] == 0) and (self.svm_clf.predict([text])[0] == 0): return True return False
def textblob_sentiment(tweets): """ Get Textblob Sentiment scores (en/fr) """ tb = [] tb_fr = [] for tweet in tweets: blob_tweet = TextBlob(tweet) tb.append(blob_tweet.sentiment.polarity) blob_tweet_fr = TextBlob( tweet, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) tb_fr.append(blob_tweet_fr.sentiment[0]) return tb, tb_fr
def __init__(self): self.french_stop_words = get_stop_words('french') # TODO Acev SpaCy, quelle différence entre "md" et "sm" ?? self.nlp = spacy.load( 'fr_core_news_md' ) # Utilisé par SapCy pour la Lemmatisation et Stemmatisation self.blob = Blobber( pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) # Analyse de sentiments
def labellize_sentence(self, sentence, model): """Use the CRF to labellize some string""" # NOW, TAG THAT blob = TextBlob(sentence, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) encoded_sentence = self.sent2features(blob.tags) print() LABELS = model.predict([encoded_sentence]) for token, label in zip(blob.tags, LABELS[0]): print(token, label)
def sentiment_analysis(name, dictionary): """ This function takes a file and creates a dictionary of each line's sentiment analysis. >>> sentiment_analysis('EmmanuelMacron', {}) {'EmmanuelMacron': [0.1466666666666667, 0.0, -0.1, 0.0, 0.42000000000000004, 0.0, 0.115, 0.0, 0.1325, 0.0, 0.03333333333333333, 0.0, 0.27, -0.12, 0.0, 0.22, 0.27, 0.1, 0.15, 0.075, 0.0, 0.0, 0.0, 0.17, 0.0, 0.07666666666666666, 0.2, 0.0, 0.0, 0.2, 0.2525, -0.35, 0.0, 0.0, 0.1, 0.0, 0.15, 0.0, 0.0, 0.56, 0.0, 0.25, 0.22, 0.0, 0.0, 0.45, 0.0, 0.0, 0.023333333333333334, 0.025000000000000022, 0.0, 0.0, -0.125, 0.0, 0.0, 0.0, 0.15, 0.13666666666666666, 0.1, 0.11, 0.0, 0.0, -0.4, 0.0, 0.0, 0.2, 0.625, 0.0, 0.0, 0.0, 0.09999999999999999, 0.0, 0.05, 0.25, 0.0, 0.0, 0.0, 0.22, 0.0, 0.22, 0.22, 0.53, -0.15, 0.0, 0.0, 0.4, 0.0, 0.0, 0.009999999999999995, 0.0, 0.0, -0.016666666666666663, 0.1, 0.0, 0.15, 0.0, 0.1, 0.0, -0.25, 0.0, -0.25166666666666665, 0.22, 0.17, 0.0, 0.0, -0.7, 0.0, 0.22, 0.22, 0.0, 0.2, 0.0, 0.0, 0.0, 0.13, 0.17, 0.0, 0.1275, 0.0, 0.0, 0.1, 0.15, -0.16249999999999998, 0.1, 0.8, 0.14, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.30833333333333335, 0.0, 0.185, 0.0, 0.0, 0.0, -0.09000000000000001, 0.0, 0.08, -0.75, 0.22, 0.0, -0.3, 0.21000000000000002, 0.010000000000000009, -0.03125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.17500000000000002, 0.3499999999999999, 0.09833333333333334, 0.135, 0.0, 0.0, 0.08, 0.2, 0.0, -0.2, 0.0, 0.2233333333333333, 0.0, 0.29, 0.0, 0.0, 0.0, 0.0, 0.6625000000000001, 0.29, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.32, 0.4, -0.24, 0.0, -0.125, 0.15, 0.0, 0.7, 0.0, 0.22, 0.0, 0.0, 0.5, 0.0, 0.2, -0.21875, 0.25, 0.26, 0.185, 0.08333333333333333, 0.23]} """ l = open(name + '.txt') lines = l.readlines() dictionary[name] = [] for i in lines: blob = TextBlob(i, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) dictionary[name].append(blob.sentiment[0]) return(dictionary)
def get_tweet_sentiment(self, row): analysis = TextBlob(str(row), pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) strAnalysis = str(analysis.sentiment) sentiment = strAnalysis[strAnalysis.find('(') + 1:strAnalysis.find(',')] if float(sentiment) > 0: return 'positive' elif float(sentiment) == 0: return 'neutral' else: return 'negative'
def sentiments_cava(msg_user): happy_bot = ["Super!", "Content que tout aille bien!"] sad_bot = [ "Désolé d'entendre ça", "Je voudrais te consoler mais je suis trop basique pour ça." ] ok_bot = ["Ok...", "Bon..."] blob = TextBlob(msg_user, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) if blob.sentiment[0] >= 0.2: return random.choice(happy_bot) if blob.sentiment[0] <= -0.2: return random.choice(sad_bot) else: return random.choice(ok_bot)
def detect_sentiment(text, lang): if lang == 'fr': sentiment = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()).sentiment polarity = sentiment[0] else: sentiment = TextBlob(text).sentiment polarity = sentiment.polarity if polarity > 0.3: text_sentiment = 'Positive' elif polarity < -0.3: text_sentiment = 'Negative' else: text_sentiment = 'Neutral or Undefined' return text_sentiment
def convert_training_set(self): """From STandfor format to scikit-learn crfsuite one""" training_set_folder = '/home/emeric/1_Github/RecipeAnalyzor/models/training_set_crf' crfsuite_annotation = [] file_list = os.listdir(training_set_folder) for filename in file_list: if filename.endswith('.ann'): root_name = re.findall('([0-9]{1,10}).ann', filename)[0] with open('{}/{}.txt'.format(training_set_folder, root_name), 'r') as handler: text = handler.read() with open('{}/{}.ann'.format(training_set_folder, root_name), 'r') as handler: annotations = [ ann.strip('\n').split('\t') for ann in handler.readlines() ] blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) file_annotation = [] tagged_tokens = [ x[2] for x in annotations if x[0].startswith('T') ] #~ print(tagged_tokens) for token in blob.tags: # NOW TAG THE SHIT if token[0] in tagged_tokens: tag = list( set([ x[1].split(' ')[0] for x in annotations if x[0].startswith('T') and x[2] == token[0] ]))[0] else: tag = '0' file_annotation.append((token[0], token[1], tag)) crfsuite_annotation.append(file_annotation) return crfsuite_annotation
def polarity(list_phrase, data): polarite = [] for i in range(len(list_phrase)): if list_phrase[i] == '': polarite.append('vide') else: text = list_phrase[i] blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) if (blob.sentiment[0] < 0.15 and blob.sentiment[0] > -0.15): polarite.append(['neutre', data[i]['date']]) elif (blob.sentiment[0] > 0.15): polarite.append(['positif', data[i]['date']]) else: polarite.append(['negatif', data[i]['date']]) return (polarite)
def blob_sentiment(text, Polarity=True, Subjectivity=True): # Utilise un fichier xml ou il y a 5300 mot ayant une polarité et une subjectivité # Le score de polarité est un float entre [-1,1] quand c'est égale à -1 l'opinion est négative et 1 l'opinion est positive # Le score de subjectivité est un float entre [0,1] quand c'est égale à 0 le texte est objectif et quand c'est égale 1 le texte est subjectif blob = TextBlob(text, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) if Polarity == True and Subjectivity == True: return blob.sentiment[0], blob.sentiment[1] elif Polarity == True and Subjectivity == False: return blob.sentiment[0] elif Polarity == False and Subjectivity == True: return blob.sentiment[1] else: return None
def get_sentiment_from_sentence(sentence): tb = Blobber(pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) input_text = sentence.line blob = tb(input_text) pola, subj = blob.sentiment perc = f"{100*abs(pola):.0f}" if pola > 0: sent = "positive" elif pola < 0: sent = "negative" else: sent = "neutral" sentiment = {'pola': pola, 'perc': perc, 'sent': sent} return sentiment
def data(file): title = file.iloc[:, 0].dropna().tolist() comment = file.iloc[:, 0].dropna().tolist() data = [title, comment] pos = [] sub = [] for item in data: for phrase in item: blob = TextBlob(phrase, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()) sen = blob.sentiment pos.append(sen[0]) sub.append(sen[1]) print(phrase) print(sen)
def polarizer_text_blob_french(df_Verbatim): """ Method which generate a list of categories of sentiments (neutral, positive, negative) given a dataframe of textual comments in french Input : a dataframe with only one column of strings Output : a list of strings """ sentiment_scores_tb = [ round( TextBlob(article, pos_tagger=PatternTagger(), analyzer=PatternAnalyzer()).sentiment[0], 3) for article in df_Verbatim ] sentiment_category_tb = [ 'positive' if score > 0 else 'negative' if score < 0 else 'neutral' for score in sentiment_scores_tb ] return (sentiment_category_tb)
class TestPatternTagger(unittest.TestCase): def setUp(self): self.tagger = PatternTagger() self.text = u"Bonjour tout le monde" def test_tag(self): tags = self.tagger.tag(self.text) logging.debug("tags: {0}".format(tags)) words = self.text.split() for i, word_tag in enumerate(tags): assert_equal(word_tag[0], words[i]) def test_tag_blob(self): blob = TextBlob(self.text, pos_tagger=self.tagger) tags = blob.tags logging.debug("tags: {0}".format(tags)) words = self.text.split() for i, word_tag in enumerate(tags): assert_equal(word_tag[0], words[i])
def setUp(self): self.tagger = PatternTagger() self.text = u"Bonjour tout le monde"