示例#1
0
def get_ent_norm_table():
    conn, cursor = get_conn_info()

    df_ent_table_norm = sql.read_sql(
        "SELECT backend_entitiesnormalized.article, \
                                backend_article.addedon, \
                                backend_article.url, \
                                backend_article.source, \
                                backend_entitiesnormalized.score,  \
                                backend_article.text, \
                                backend_entitiesnormalized.name \
                                FROM \
                                backend_entitiesnormalized\
                                INNER JOIN backend_article \
                                ON backend_article.uniqueid = backend_entitiesnormalized.article;",
        conn)

    df_ent_table_norm["addedonpy"] = [
        d.to_pydatetime() for d in df_ent_table_norm["addedon"]
    ]
    df_ent_table_norm["addedon"] = [
        d.strftime('%d_%m_%Y') if not pd.isnull(d) else ''
        for d in df_ent_table_norm["addedonpy"]
    ]
    df_ent_table_norm["length"] = [
        len(text) for text in df_ent_table_norm["text"]
    ]

    cursor.close()
    conn.close()
    return df_ent_table_norm
def get_flag_data():
    conn, cursor = get_conn_info()

    df_flag_table = sql.read_sql(
        "SELECT backend_article.uniqueid, \
                            backend_article.analyzed, \
                            backend_article.nltk_sentiment, \
                            backend_article.aylien_sentiment_adv \
                            FROM backend_article;", conn)

    cursor.close()
    conn.close()
    return df_flag_table
def get_mod_data():
    conn, cursor = get_conn_info()

    df_mod_table = sql.read_sql(
        """SELECT backend_randomarticleuserrating.uniqueid_id As ArticleID, 
                                    backend_randomarticleuserrating.score as UserScore,
                                    backend_sentiment.watson_score, 
                                    backend_sentiment.aylien_polarity, 
                                    backend_sentiment.aylien_confidence, 
                                    backend_sentiment.aylien_paragraph1_sentiment, 
                                    backend_sentiment.aylien_paragraph1_confidence, 
                                    backend_sentiment.aylien_paragraph2_sentiment, 
                                    backend_sentiment.aylien_paragraph2_confidence, 
                                    backend_sentiment.aylien_title_sentiment, 
                                    backend_sentiment.aylien_title_confidence, 
                                    backend_sentiment.nltk_paragraph1_sentiment, 
                                    backend_sentiment.nltk_paragraph2_sentiment, 
                                    backend_sentiment.nltk_title_sentiment, 
                                    backend_sentiment.nltk_combined_sentiment, 
                                    backend_sentiment.model_score, 
                                    backend_emotion.anger,
                                    backend_emotion.disgust,
                                    backend_emotion.fear,
                                    backend_emotion.joy,
                                    backend_emotion.sadness
                                    FROM backend_randomarticleuserrating, backend_sentiment, backend_emotion 
where backend_randomarticleuserrating.uniqueid_id = backend_sentiment.article AND backend_randomarticleuserrating.uniqueid_id = backend_emotion.article;""",
        conn)
    cursor.close()
    conn.close()
    #Two versions of the tables are returned. A full one and one where the articles goruped by the mean of the user rating
    df_mod_table_simple = df_mod_table[[
        'articleid', 'watson_score', 'nltk_combined_sentiment',
        'nltk_title_sentiment', 'model_score', 'userscore'
    ]]
    df_mod_table_simple_mean = pd.DataFrame(
        df_mod_table_simple.groupby([
            'articleid', 'watson_score', 'nltk_combined_sentiment',
            'nltk_title_sentiment', 'model_score'
        ],
                                    as_index=False)['userscore'].mean())

    return df_mod_table, df_mod_table_simple_mean
示例#4
0
def get_url_table():
    conn, cursor = get_conn_info()
    df_url_table = sql.read_sql(
        "SELECT backend_article.uniqueid, \
                            backend_article.url, \
                            backend_article.addedon, \
                            backend_article.sumanalyzed \
                            FROM backend_article;", conn)

    df_url_table["addedonpy"] = [
        d.to_pydatetime() for d in df_url_table["addedon"]
    ]
    df_url_table["addedon"] = [
        d.strftime('%d_%m_%Y') if not pd.isnull(d) else ''
        for d in df_url_table["addedonpy"]
    ]
    cursor.close()
    conn.close()
    return df_url_table
示例#5
0
def get_top_ents():
    conn, cursor = get_conn_info()
    df_top_ents_table = sql.read_sql(
        "select backend_article.addedon, backend_entitiesnormalized.name, \
                                        backend_entitiesnormalized.score, \
                                        backend_entitiesnormalized.article,  backend_sentiment.model_score FROM \
                                        backend_sentiment,  backend_article, backend_entitiesnormalized WHERE  \
                                        backend_article.uniqueid = backend_entitiesnormalized.article AND \
                                        backend_article.uniqueid = backend_sentiment.article;",
        conn)

    df_top_ents_table["addedonpy"] = [
        d.to_pydatetime() for d in df_top_ents_table["addedon"]
    ]
    df_top_ents_table["addedondt"] = [
        d.to_pydatetime().date() for d in df_top_ents_table["addedon"]
    ]
    cursor.close()
    conn.close()
    return df_top_ents_table
示例#6
0
def get_art_table():
    conn, cursor = get_conn_info()
    df_art_table = sql.read_sql("SELECT backend_article.uniqueid, \
                                backend_article.addedon, \
                                backend_article.url, \
                                backend_article.title, \
                                backend_article.source, \
                                backend_article.modelprocessed, \
                                backend_sentiment.watson_score, \
                                backend_sentiment.nltk_combined_sentiment, \
                                backend_sentiment.nltk_title_sentiment, \
                                backend_article.entitynormalized, \
                                backend_article.nltk_sentiment, \
                                backend_article.aylien_sentiment_adv, \
                                backend_article.text \
                                FROM \
                                backend_article \
                                INNER JOIN \
                                backend_sentiment \
                                ON backend_article.uniqueid = backend_sentiment.article;"\
                                , conn)

    df_art_table["addedonpy"] = [
        d.to_pydatetime() for d in df_art_table["addedon"]
    ]
    df_art_table["addedondt"] = [
        d.to_pydatetime().date() for d in df_art_table["addedon"]
    ]
    df_art_table["addedon"] = [
        d.strftime('%d_%m_%Y') if not pd.isnull(d) else ''
        for d in df_art_table["addedonpy"]
    ]
    df_art_table["length"] = [len(text) for text in df_art_table["text"]]

    cursor.close()
    conn.close()
    return df_art_table
示例#7
0
#create an ascending and descending list
df_top_ents_table_date_simple_sum_sorted_asc = df_top_ents_table_date_simple_sum.sort_values('full_ent_score', ascending=False)
df_top_ents_table_date_simple_sum_sorted_des = df_top_ents_table_date_simple_sum.sort_values('full_ent_score', ascending=True)

#take the top 5
ents_top5_pos = df_top_ents_table_date_simple_sum_sorted_asc[:5]
ents_top5_neg = df_top_ents_table_date_simple_sum_sorted_des[:5]

#trim away the stuff we don't need
ents_top5_pos_trim = ents_top5_pos[['name', 'full_ent_score']]
ents_top5_neg_trim = ents_top5_neg[['name', 'full_ent_score']]


#send both lists to the DB
conn, cursor = get_conn_info()
cursor.execute("truncate backend_toppositiveentities;")
cursor.execute("truncate backend_topnegativeentities;")

for index, row in ents_top5_pos_trim.iterrows():
	cursor.execute("INSERT INTO backend_toppositiveentities (name, score) VALUES (%s, %s);", (row['name'],row['full_ent_score'])) 

for index, row in ents_top5_neg_trim.iterrows():
	cursor.execute("INSERT INTO backend_topnegativeentities (name, score) VALUES (%s, %s);", (row['name'],row['full_ent_score'])) 


conn.commit()
cursor.close()
conn.close()
print("Top ents done")
print(ents_top5_pos_trim)
示例#8
0
def get_aylien(df_sample):
    conn, cursor = get_conn_info()
    #Sometimes we need to cycle accounts to get enough units, however this is done manually
    #   c = textapi.Client("f40063af", "ddd790cf8730e5934f0a416f4ac44b2a") #paul
    #   c = textapi.Client("0ae83fa3", "9b5206bca074e0c2cc55aa055aac92d0") #ed
    c = textapi.Client("b08da84e", "eb6d7a1184b9aaf845646e66a513429f")  #ty

    loop_count = 0

    #The argument for this function is a dataframe with all the articles we want to get the Aylien
    #sentiment details for.
    for index, row in df_sample.iterrows():

        #Report the progress every 5 articles
        if (loop_count % 5 == 0 | loop_count == len(df_sample)):
            print("{} of {} articles processed".format(loop_count,
                                                       len(df_sample)))

        #How many lines are in the text
        lines_list = tokenizer.tokenize(row['text'])
        title = row['title']
        num_sents = len(lines_list)

        #Get the section length and then the text sections for upper and lower
        section_length = math.floor(num_sents / 2)
        section1 = lines_list[:len(lines_list) // 2]
        section1_joined = " ".join(section1)
        section2 = lines_list[(-len(lines_list) // 2):]
        section2_joined = " ".join(section2)

        aresp = c.Sentiment(section1_joined)
        #need to delay requests to ensure we adhere to Aylien requirements
        time.sleep(1)
        #extract the info from the response
        s1_norm = aresp['polarity']
        s1_norm_conf = aresp['polarity_confidence']
        aresp = c.Sentiment(section2_joined)
        time.sleep(1)
        s2_norm = aresp['polarity']
        s2_norm_conf = aresp['polarity_confidence']
        aresp = c.Sentiment(title)
        time.sleep(1)
        stitle = aresp['polarity']
        stitle_conf = aresp['polarity_confidence']

        #We want to insert into the DB based on the uniqueID
        uniqueid = row['uniqueid']
        cursor.execute(
            " update backend_sentiment set aylien_paragraph1_sentiment = (%s) where article =  (%s) ;",
            (
                s1_norm,
                uniqueid,
            ))
        cursor.execute(
            " update backend_sentiment set aylien_paragraph2_sentiment = (%s) where article =  (%s) ;",
            (
                s2_norm,
                uniqueid,
            ))
        cursor.execute(
            " update backend_sentiment set aylien_title_sentiment = (%s) where article =  (%s) ;",
            (
                stitle,
                uniqueid,
            ))

        cursor.execute(
            " update backend_sentiment set aylien_paragraph1_confidence = (%s) where article =  (%s) ;",
            (
                s1_norm_conf,
                uniqueid,
            ))
        cursor.execute(
            " update backend_sentiment set aylien_paragraph2_confidence = (%s) where article =  (%s) ;",
            (
                s2_norm_conf,
                uniqueid,
            ))
        cursor.execute(
            " update backend_sentiment set aylien_title_confidence = (%s) where article =  (%s) ;",
            (
                stitle_conf,
                uniqueid,
            ))

        cursor.execute(
            " update backend_article set aylien_sentiment_adv = (%s) where uniqueid =  (%s) ;",
            (
                True,
                uniqueid,
            ))
        #keep an eye on the ammount of requests we have done
        loop_count += 1

    conn.commit()
    cursor.close()