def get_ent_norm_table(): conn, cursor = get_conn_info() df_ent_table_norm = sql.read_sql( "SELECT backend_entitiesnormalized.article, \ backend_article.addedon, \ backend_article.url, \ backend_article.source, \ backend_entitiesnormalized.score, \ backend_article.text, \ backend_entitiesnormalized.name \ FROM \ backend_entitiesnormalized\ INNER JOIN backend_article \ ON backend_article.uniqueid = backend_entitiesnormalized.article;", conn) df_ent_table_norm["addedonpy"] = [ d.to_pydatetime() for d in df_ent_table_norm["addedon"] ] df_ent_table_norm["addedon"] = [ d.strftime('%d_%m_%Y') if not pd.isnull(d) else '' for d in df_ent_table_norm["addedonpy"] ] df_ent_table_norm["length"] = [ len(text) for text in df_ent_table_norm["text"] ] cursor.close() conn.close() return df_ent_table_norm
def get_flag_data(): conn, cursor = get_conn_info() df_flag_table = sql.read_sql( "SELECT backend_article.uniqueid, \ backend_article.analyzed, \ backend_article.nltk_sentiment, \ backend_article.aylien_sentiment_adv \ FROM backend_article;", conn) cursor.close() conn.close() return df_flag_table
def get_mod_data(): conn, cursor = get_conn_info() df_mod_table = sql.read_sql( """SELECT backend_randomarticleuserrating.uniqueid_id As ArticleID, backend_randomarticleuserrating.score as UserScore, backend_sentiment.watson_score, backend_sentiment.aylien_polarity, backend_sentiment.aylien_confidence, backend_sentiment.aylien_paragraph1_sentiment, backend_sentiment.aylien_paragraph1_confidence, backend_sentiment.aylien_paragraph2_sentiment, backend_sentiment.aylien_paragraph2_confidence, backend_sentiment.aylien_title_sentiment, backend_sentiment.aylien_title_confidence, backend_sentiment.nltk_paragraph1_sentiment, backend_sentiment.nltk_paragraph2_sentiment, backend_sentiment.nltk_title_sentiment, backend_sentiment.nltk_combined_sentiment, backend_sentiment.model_score, backend_emotion.anger, backend_emotion.disgust, backend_emotion.fear, backend_emotion.joy, backend_emotion.sadness FROM backend_randomarticleuserrating, backend_sentiment, backend_emotion where backend_randomarticleuserrating.uniqueid_id = backend_sentiment.article AND backend_randomarticleuserrating.uniqueid_id = backend_emotion.article;""", conn) cursor.close() conn.close() #Two versions of the tables are returned. A full one and one where the articles goruped by the mean of the user rating df_mod_table_simple = df_mod_table[[ 'articleid', 'watson_score', 'nltk_combined_sentiment', 'nltk_title_sentiment', 'model_score', 'userscore' ]] df_mod_table_simple_mean = pd.DataFrame( df_mod_table_simple.groupby([ 'articleid', 'watson_score', 'nltk_combined_sentiment', 'nltk_title_sentiment', 'model_score' ], as_index=False)['userscore'].mean()) return df_mod_table, df_mod_table_simple_mean
def get_url_table(): conn, cursor = get_conn_info() df_url_table = sql.read_sql( "SELECT backend_article.uniqueid, \ backend_article.url, \ backend_article.addedon, \ backend_article.sumanalyzed \ FROM backend_article;", conn) df_url_table["addedonpy"] = [ d.to_pydatetime() for d in df_url_table["addedon"] ] df_url_table["addedon"] = [ d.strftime('%d_%m_%Y') if not pd.isnull(d) else '' for d in df_url_table["addedonpy"] ] cursor.close() conn.close() return df_url_table
def get_top_ents(): conn, cursor = get_conn_info() df_top_ents_table = sql.read_sql( "select backend_article.addedon, backend_entitiesnormalized.name, \ backend_entitiesnormalized.score, \ backend_entitiesnormalized.article, backend_sentiment.model_score FROM \ backend_sentiment, backend_article, backend_entitiesnormalized WHERE \ backend_article.uniqueid = backend_entitiesnormalized.article AND \ backend_article.uniqueid = backend_sentiment.article;", conn) df_top_ents_table["addedonpy"] = [ d.to_pydatetime() for d in df_top_ents_table["addedon"] ] df_top_ents_table["addedondt"] = [ d.to_pydatetime().date() for d in df_top_ents_table["addedon"] ] cursor.close() conn.close() return df_top_ents_table
def get_art_table(): conn, cursor = get_conn_info() df_art_table = sql.read_sql("SELECT backend_article.uniqueid, \ backend_article.addedon, \ backend_article.url, \ backend_article.title, \ backend_article.source, \ backend_article.modelprocessed, \ backend_sentiment.watson_score, \ backend_sentiment.nltk_combined_sentiment, \ backend_sentiment.nltk_title_sentiment, \ backend_article.entitynormalized, \ backend_article.nltk_sentiment, \ backend_article.aylien_sentiment_adv, \ backend_article.text \ FROM \ backend_article \ INNER JOIN \ backend_sentiment \ ON backend_article.uniqueid = backend_sentiment.article;"\ , conn) df_art_table["addedonpy"] = [ d.to_pydatetime() for d in df_art_table["addedon"] ] df_art_table["addedondt"] = [ d.to_pydatetime().date() for d in df_art_table["addedon"] ] df_art_table["addedon"] = [ d.strftime('%d_%m_%Y') if not pd.isnull(d) else '' for d in df_art_table["addedonpy"] ] df_art_table["length"] = [len(text) for text in df_art_table["text"]] cursor.close() conn.close() return df_art_table
#create an ascending and descending list df_top_ents_table_date_simple_sum_sorted_asc = df_top_ents_table_date_simple_sum.sort_values('full_ent_score', ascending=False) df_top_ents_table_date_simple_sum_sorted_des = df_top_ents_table_date_simple_sum.sort_values('full_ent_score', ascending=True) #take the top 5 ents_top5_pos = df_top_ents_table_date_simple_sum_sorted_asc[:5] ents_top5_neg = df_top_ents_table_date_simple_sum_sorted_des[:5] #trim away the stuff we don't need ents_top5_pos_trim = ents_top5_pos[['name', 'full_ent_score']] ents_top5_neg_trim = ents_top5_neg[['name', 'full_ent_score']] #send both lists to the DB conn, cursor = get_conn_info() cursor.execute("truncate backend_toppositiveentities;") cursor.execute("truncate backend_topnegativeentities;") for index, row in ents_top5_pos_trim.iterrows(): cursor.execute("INSERT INTO backend_toppositiveentities (name, score) VALUES (%s, %s);", (row['name'],row['full_ent_score'])) for index, row in ents_top5_neg_trim.iterrows(): cursor.execute("INSERT INTO backend_topnegativeentities (name, score) VALUES (%s, %s);", (row['name'],row['full_ent_score'])) conn.commit() cursor.close() conn.close() print("Top ents done") print(ents_top5_pos_trim)
def get_aylien(df_sample): conn, cursor = get_conn_info() #Sometimes we need to cycle accounts to get enough units, however this is done manually # c = textapi.Client("f40063af", "ddd790cf8730e5934f0a416f4ac44b2a") #paul # c = textapi.Client("0ae83fa3", "9b5206bca074e0c2cc55aa055aac92d0") #ed c = textapi.Client("b08da84e", "eb6d7a1184b9aaf845646e66a513429f") #ty loop_count = 0 #The argument for this function is a dataframe with all the articles we want to get the Aylien #sentiment details for. for index, row in df_sample.iterrows(): #Report the progress every 5 articles if (loop_count % 5 == 0 | loop_count == len(df_sample)): print("{} of {} articles processed".format(loop_count, len(df_sample))) #How many lines are in the text lines_list = tokenizer.tokenize(row['text']) title = row['title'] num_sents = len(lines_list) #Get the section length and then the text sections for upper and lower section_length = math.floor(num_sents / 2) section1 = lines_list[:len(lines_list) // 2] section1_joined = " ".join(section1) section2 = lines_list[(-len(lines_list) // 2):] section2_joined = " ".join(section2) aresp = c.Sentiment(section1_joined) #need to delay requests to ensure we adhere to Aylien requirements time.sleep(1) #extract the info from the response s1_norm = aresp['polarity'] s1_norm_conf = aresp['polarity_confidence'] aresp = c.Sentiment(section2_joined) time.sleep(1) s2_norm = aresp['polarity'] s2_norm_conf = aresp['polarity_confidence'] aresp = c.Sentiment(title) time.sleep(1) stitle = aresp['polarity'] stitle_conf = aresp['polarity_confidence'] #We want to insert into the DB based on the uniqueID uniqueid = row['uniqueid'] cursor.execute( " update backend_sentiment set aylien_paragraph1_sentiment = (%s) where article = (%s) ;", ( s1_norm, uniqueid, )) cursor.execute( " update backend_sentiment set aylien_paragraph2_sentiment = (%s) where article = (%s) ;", ( s2_norm, uniqueid, )) cursor.execute( " update backend_sentiment set aylien_title_sentiment = (%s) where article = (%s) ;", ( stitle, uniqueid, )) cursor.execute( " update backend_sentiment set aylien_paragraph1_confidence = (%s) where article = (%s) ;", ( s1_norm_conf, uniqueid, )) cursor.execute( " update backend_sentiment set aylien_paragraph2_confidence = (%s) where article = (%s) ;", ( s2_norm_conf, uniqueid, )) cursor.execute( " update backend_sentiment set aylien_title_confidence = (%s) where article = (%s) ;", ( stitle_conf, uniqueid, )) cursor.execute( " update backend_article set aylien_sentiment_adv = (%s) where uniqueid = (%s) ;", ( True, uniqueid, )) #keep an eye on the ammount of requests we have done loop_count += 1 conn.commit() cursor.close()