def executeEmpathOnISEAR(ISEAR, DATADIR): try: corpus = pd.read_csv(ISEAR, sep=',',header=None) if not os.path.isfile(DATADIR + "/labels_empath_on_ISEAR.txt"): lexicon = Empath() #instance of empath analyser emotions_list = ['fear', 'joy', 'anger', 'sadness', 'disgust'] model = "reddit" res = {} best_em = [] # will contain empath analysis results emotions_results = [] for i in range(len(emotions_list)): # creates a category for each emotion lexicon.create_category(emotions_list[i],[emotions_list[i]], model=model) for sentence in corpus[1]: for k in range(len(emotions_list)): # tokenizes and analyzes the sentences tokens = nltk.word_tokenize(sentence) emotions_results = lexicon.analyze(tokens, normalize=True, categories=[emotions_list[k]]) res = {**res, **emotions_results} # merge all results in one dictionary emotion_results = [] max_likely_emotions_empath = max(res.items(), key=operator.itemgetter(1))[0] if res[max_likely_emotions_empath] != 0.0: best_em.append(max_likely_emotions_empath) else: best_em.append('no_idea') best_em = np.asarray(best_em) np.savetxt(DATADIR + "/labels_empath_on_ISEAR.txt", best_em, fmt="%s") #saves empath detection # ---------------------------------- if labels already exist: -------------------------------- ISEAR_labels = corpus[0] empath_labels = pd.read_csv(DATADIR + '/labels_empath_on_ISEAR.txt', sep=',',header=None) detected_labels = [ISEAR_labels[i] for i in range(len(ISEAR_labels)) if empath_labels[0][i] != 'no_idea'] matches = [ISEAR_labels[i] for i in range(len(ISEAR_labels)) if empath_labels[0][i] == ISEAR_labels[i]] detected_percentage = len(detected_labels)/len(ISEAR_labels) overall_accuracy = len(matches)/len(ISEAR_labels) detected_accuracy = len(matches)/len(detected_labels) print('detected_percentage:', detected_percentage) print('detected_accuracy:', detected_accuracy) print('overall_accuracy:', overall_accuracy) return 0 except Exception as e: print(str(e)) return 51
args = np.argsort(cosines)[::-1] return [(index_to_botname[x], cosines[x]) for x in args] def edit_distance(query_str, msg_str): return Levenshtein.distance(query_str.lower(), msg_str.lower()) def similar_names(query, msgs): li = [(edit_distance(query, msg), msg) for msg in msgs] li.sort(key=lambda x: x[0]) return li lexicon = Empath() lexicon.create_category("funny", ["funny", "lol", "hilarious", "haha", "joke"]) #lexicon.create_category("silly",["silly","ridiculous","childish"]) lexicon.create_category("stupid", ["stupid", "dumb", "pointless", "wrong"]) #lexicon.create_category("good", ["good", "great", "perfect", "wonderful", "fantastic"]) lexicon.create_category( "bad", ["bad", "wrong", "waste", "inaccurate", "stupid", "disagree", "sad"]) lexicon.create_category("useful", ["good", "function", "effective", "interesting"]) lexicon.create_category("appreciated", ["appreciate", "thanks", "good", "useful"]) #lexicon.create_category("interesting", ["cool", "interesting", "fascinating"]) lexicon.create_category( "factual", ["fact", "check", "statistics", "information", "informative"]) lexicon.create_category("shocking", ["shocked", "wtf", "shit", "jesus", "christ", "yikes"])
def textProcessing(JSON, DATA, USERNAME, APIKEY): global JSON_file global data_folder global sender_name JSON_file = JSON data_folder = DATA sender_name = USERNAME if not os.path.isfile(JSON_file): return 201 if not os.path.isdir(data_folder): return 202 if data_folder[-1:] == "/": return 203 analyser = SentimentIntensityAnalyzer( ) # istance of vader sentiment analyzer lexicon = Empath() #instance of empath analyser emotions_list = ['fear', 'joy', 'anger', 'sadness', 'disgust'] model = "reddit" with open(JSON_file, encoding='utf-8-sig') as json_file: try: try: json_data = json.load(json_file) except Exception as e: return 23 chat = json_data["chats"]["list"] sentences = [] text_emotions = [] for i in range( len(chat[0] ["messages"])): # appends to a list all the messages if (chat[0]["messages"][i]["from"]).replace( " ", "") == sender_name and len( chat[0]["messages"][i]["text"]) > 6: sentences.append(chat[0]["messages"][i]["text"]) for i in range( len(emotions_list)): # creates a category for each emotion lexicon.create_category(emotions_list[i], [emotions_list[i]], model=model) best_em = [] # will contain empath analysis results emotions_results = [] res = {} neg = [] neutral = [] pos = [] for j in range(len(sentences)): if sentences[j] != '': print('') print(sentences[j]) output = detect_emotion(sentences[j], APIKEY) if output == 25: return output text_emotions.append(output) output = sentiment_analyzer_scores(sentences[j], analyser) if output == 26: return output neg.append(output["neg"]) neutral.append(output["neu"]) pos.append(output["pos"]) if j != len(sentences) - 1: neg.append(',') neutral.append(',') pos.append(',') error = textBlobSentimentAnalyze(sentences[j]) if error != 0: return error for k in range(len(emotions_list) ): # tokenizes and analyzes the sentences tokens = nltk.word_tokenize(sentences[j]) emotions_results = lexicon.analyze( tokens, normalize=True, categories=[emotions_list[k]]) res = { **res, **emotions_results } # merge all results in one dictionary print(res) emotions_results = [] max_likely_emotions_empath = max( res.items(), key=operator.itemgetter(1))[0] if max(res.items(), key=operator.itemgetter(1))[1] != 0.0: print('max empath:', max_likely_emotions_empath) print('') if not os.path.isfile(data_folder + "/sentences.txt"): print("Creating and wrting into:'sentences.txt' ...") sentences = np.asarray(sentences) np.savetxt(data_folder + "/sentences.txt", sentences, fmt="%s") #saves sentences else: return 280 if not os.path.isfile(data_folder + "/text_emotions.txt"): print("Creating and wrting into:'text_emotions.txt' ...") text_emotions = np.asarray(text_emotions) np.savetxt(data_folder + "/text_emotions.txt", text_emotions, fmt="%s") #saves emotions else: return 281 if not os.path.isfile(data_folder + "/sentiment_types.txt"): print("Creating and wrting into:'sentiment_types.txt' ...") sentiment_types = [] sentiment_types.append(neg) sentiment_types.append(neutral) sentiment_types.append(pos) sentiment_types = np.asarray(sentiment_types) np.savetxt(data_folder + "/sentiment_types.txt", sentiment_types, fmt="%s") #saves sentiment types else: return 282 return 0 except Exception as e: print(e) return 24
def command(self, name, seed_terms): from empath import Empath lexicon = Empath() lexicon.create_category(name, seed_terms, model="reddit") out = lexicon.cats[name] return name, out
def count_unconnect(u): # espero que seja um grupo bem diverso lexicon = Empath() # print(len(u)) lexicon.create_category("support", support, model="nytimes") lexicon.create_category("conflict", conflict, model="nytimes") lexicon.create_category("conclusion", conclusion, model="nytimes") lexicon.create_category("complementary", complementary, model="nytimes") lexicon.create_category("causal_argument", causal_argument, model="nytimes") lexicon.create_category("verbs_hedging", verbs_hedging, model="nytimes") #["because", "only", "before", "so", "if", "though", "then", "until", "once", "even", "since", "although", "so", "while", "having", "because", "already", "thus", "time", "unless", "now", "actually", "eventually"] #["though", "although", "except", "yet", "but", "even", "because", "only", "Though", "Although", "Yet", "either", "nevertheless", "whereas", "though", "fact", "however", "unlike", "Furthermore", "because", "nonetheless", "And", "However", "none", "either", "still", "Even", "despite", "if", "so", "Yet", "meaning", "indeed", "consequently"] #[] #["while", "whereas", "though", "only", "yet", "While", "thus", "even", "Thus", "Instead", "although", "instead", "Though", "Moreover", "actually", "nevertheless", "sometimes", "still", "rather"] #["means", "therefore", "means", "merely", "mechanism", "democratic_process", "Therefore", "simply", "free_market", "consequence", "because"] # cat_all = lexicon.analyze(u, categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True) cat_all = lexicon.analyze(u, categories=['verbs_hedging'], normalize=True) #cat_all = {} #for arg in u: # cat = lexicon.analyze(arg) # if cat["children"] != 0: # print(arg, cat["children"]) return cat_all
def count_connect(u): cat_all = {} lexicon = Empath() lexicon.create_category("support", support, model="nytimes") lexicon.create_category("conflict", conflict, model="nytimes") lexicon.create_category("conclusion", conclusion, model="nytimes") lexicon.create_category("complementary", complementary, model="nytimes") lexicon.create_category("causal_argument", causal_argument, model="nytimes") lexicon.create_category("verbs_hedging", verbs_hedging, model="nytimes") heads = [] not_heads = [] for (arg1, arg2) in u: heads.append(arg1) not_heads.append(arg2) norep_heads = list(set(heads)) norep_not_heads = list(set(not_heads)) args_conn = list(set(heads) | set(not_heads)) lexicon = Empath() #cat_heads = lexicon.analyze(norep_heads, categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True) cat_heads = lexicon.analyze(norep_heads, categories=['verbs_hedging'], normalize=True) # cat_heads = {} # for h in norep_heads: # cat_heads = lexicon.analyze(h, normalize=True) # if cat_heads["fun"] != 0: # print(h, cat_heads["fun"]) # cat_not_heads = lexicon.analyze(norep_not_heads,categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True) cat_not_heads = lexicon.analyze(norep_not_heads, categories=['verbs_hedging'], normalize=True) # cat_all = lexicon.analyze(args_conn,categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True) cat_all = lexicon.analyze(args_conn, categories=['verbs_hedging'], normalize=True) return cat_heads, cat_not_heads, cat_all
from empath import Empath import sys import json from cnrelated import get_related_terms from os import path emp = Empath() # If -d specified delete the categories to start fresh if len(sys.argv) > 2: if sys.argv[2] == "-d": with open(sys.argv[1], "r") as file: obj = json.load(file) for word in obj["topics"]: print(word) emp.delete_category(word) # Otherwise, load up the topics and create categories from them elif len(sys.argv) == 2: with open(sys.argv[1], "r") as file: obj = json.load(file) for word in obj["topics"]: # Check to see if a category already exists to ensure we aren't overwriting them every time if not path.exists("venv/Lib/site-packages/empath/data/user/" + word + ".empath"): # Get the related words from ConceptNet to use as a seed for creating the category seeds = get_related_terms(word) print(word, seeds) # Create the category for word emp.create_category(word, seeds)