示例#1
0
    def word_similarity_graph(self, dataframe, word):
        corpus = st.CorpusFromPandas(dataframe, category_col='Document Type',
                                     text_col='Text', nlp=self.nlp).build()

        html = word_similarity_explorer(corpus,
                                        category='1st Document',
                                         category_name='1st Document',
                                         not_category_name='2nd Document',
                                         target_term=word,
                                         minimum_term_frequency=5,
                                         pmi_threshold_coefficient=4,
                                         width_in_pixels=1000,
                                         alpha=0.01,
                                         max_p_val=0.05,
                                         save_svg_button=True)
        logger.getLogger().info("Opening Word Similarity Visual")
        open(self.term_file, 'wb').write(html.encode('utf-8'))
示例#2
0
def main():
	nlp = spacy.load('en_core_web_sm')
	convention_df = SampleCorpora.ConventionData2012.get_data()
	corpus = CorpusFromPandas(convention_df,
	                          category_col='party',
	                          text_col='text',
	                          nlp=nlp).build()
	html = word_similarity_explorer(corpus,
	                                category='democrat',
	                                category_name='Democratic',
	                                not_category_name='Republican',
	                                target_term='jobs',
	                                minimum_term_frequency=5,
	                                width_in_pixels=1000,
	                                metadata=convention_df['speaker'],
	                                alpha=0.01,
	                                max_p_val=0.1,
	                                save_svg_button=True)
	open('./demo_similarity.html', 'wb').write(html.encode('utf-8'))
	print('Open ./demo_similarlity.html in Chrome or Firefox.')
def main():
    nlp = spacy.load('en')

    # ================================================================================
    # convention_df = SampleCorpora.ConventionData2012.get_data()
    # print("convention_df",convention_df)
    #         party         speaker  \
    # 0    democrat    BARACK OBAMA
    # 1    democrat    MICHELLE OBAMA
    # 2    democrat    RICHARD DURBIN
    # 3    democrat    JOSEPH BIDEN
    # 4    democrat    JILL BIDEN
    # 5    democrat    ANGIE FLORES

    #      text
    # 0    Thank you. Thank you. Thank you. Thank you so much.Thank you.Thank you so much. Thank you. Thank you very
    # 1    Thank you so much. Tonight, I am so thrilled and so honored and so proud to introduce the love of my life
    # 2    Thank you. It is a singular honor to be here tonight. Eight years ago in Boston, I introduced you to a sta
    # 3    Hey, Delaware. \nAnd my favorite Democrat, Jilly, I want you to know that Beau and Hunt and Ashley and I —
    # 4    Hello. \nThank you, Angie. I'm so proud of how far you've come.\nI'm so proud to stand before you tonight
    # 5    My name is Angie Flores and I am a student at Miami-Dade College. \nWhen you grow up in a family where get

    # print("convention_df",convention_df.shape)
    # (189, 3)

    # df1=convention_df.iloc[:10,:]
    # df2=convention_df.iloc[150:160,:]
    # df_cat=pd.concat([df1,df2],axis=0)
    # # print("df_cat",df_cat.shape)
    # # (20, 3)
    # convention_df=df_cat

    # ================================================================================
    # convention_df=pd.read_csv('/mnt/1T-5e7/mycodehtml/Data_mining/Visualization/Scattertext/Data/WebMD_Metformin_oral/text.csv',encoding='utf8',error_bad_lines=False)
    # print("convention_df",convention_df.shape)

    # ================================================================================
    # corpus = CorpusFromPandas(convention_df,
    #                           category_col='party',
    #                           text_col='text',
    #                           nlp=nlp).build()

    # ================================================================================
    all_satisfaction_score_comment_in_all_conds = utils_data.get_all_satisfaction_score_comment_in_all_conds(
    )
    # print("all_satisfaction_score_comment_in_all_conds",all_satisfaction_score_comment_in_all_conds)
    # [['negative', 'Satisfaction', 'after a week----mouth ulccers,cudnt talk,eat,drink for 5 days....whole body burnt,headache, fatigue....quit---am slowly getting better, wudnt give to my worst

    # print("all_satisfaction_score_comment_in_all_conds",len(all_satisfaction_score_comment_in_all_conds))
    # 1402

    # ================================================================================
    columns = ['senti_on_Metfor_oral', 'feature', 'review']
    all_satisfaction_score_comment_in_all_conds_df = pd.DataFrame(
        all_satisfaction_score_comment_in_all_conds,
        index=None,
        columns=columns)

    # ================================================================================
    corpus = CorpusFromPandas(all_satisfaction_score_comment_in_all_conds_df,
                              category_col='senti_on_Metfor_oral',
                              text_col='review',
                              nlp=nlp).build()

    # ================================================================================
    html = word_similarity_explorer(
        corpus,
        category='negative',
        category_name='Negative',
        not_category_name='Positive',
        target_term='jobs',
        minimum_term_frequency=5,
        width_in_pixels=1000,
        metadata=all_satisfaction_score_comment_in_all_conds_df['feature'],
        alpha=0.01,
        max_p_val=0.1,
        save_svg_button=True)

    # ================================================================================
    open(
        '/mnt/1T-5e7/mycodehtml/Data_mining/Visualization/Scattertext/demo_similarity.html',
        'wb').write(html.encode('utf-8'))
    print('Open ./demo_similarlity.html in Chrome or Firefox.')