Пример #1
0
        term1topN_tf = term1topN_tf[term1topN_tf['Speaker Party'].isin(
            parties4)]
        dfbypartyspeaker = dfbypartyspeaker[
            dfbypartyspeaker['Speaker Party'].isin(parties4)]
    elif N == 2:
        term1topN_tf = term1topN_tf[term1topN_tf['Speaker Party'].isin(
            parties2)]
        dfbypartyspeaker = dfbypartyspeaker[
            dfbypartyspeaker['Speaker Party'].isin(parties2)]
    # %%
    term1_topN_bySpeakerParty, topN = m.select_phrases_from_df2(
        dfbypartyspeaker, term1topN_tf, ['Speaker Party', 'Speaker'])
    # else:
    #     term1_topN_bySpeakerParty, topN = m.select_phrases_from_df2(dfbypartyspeaker,term1topN_tf,['Speaker Party','Speaker'])
    # %% tfidf top 500
    term1_topN_bySpeakerParty_scaled = m.make_share(term1_topN_bySpeakerParty)
    term1_topN_bySpeakerParty_share = m.make_share(term1_topN_bySpeakerParty,
                                                   scale=False)

    # %% save result
    print(sys.argv[l_in * 2 + 4 + (i - 1) * 5])
    # print(sys.argv)
    term1_topN_bySpeakerParty.to_csv(sys.argv[l_in * 2 + 4 + (i - 1) * 5])
    term1_topN_bySpeakerParty_scaled.to_csv(sys.argv[l_in * 2 + 4 + 1 +
                                                     (i - 1) * 5])
    term1_topN_bySpeakerParty_share.to_csv(sys.argv[l_in * 2 + 4 + 2 +
                                                    (i - 1) * 5])

    # %% save interim results
    if len(fixed_phrases) > 8:
        term1_tf = pd.DataFrame()
Пример #2
0
# %% include phrases mentioned at least 100 times -> 991 w
dfoverall5cap100 = dfoverall5[dfoverall5.Counts >= 100]

#%%
term5_cap20_bySpeakerParty = m.select_phrases_from_df(dfbypartyspeaker5,dfoverall5cap20,['Speaker Party','Speaker'])
term5_cap20_bySpeakerParty.shape
#%%
term5_cap50_bySpeakerParty = m.select_phrases_from_df(dfbypartyspeaker5,dfoverall5cap50,['Speaker Party','Speaker'])
term5_cap50_bySpeakerParty.shape
#%%
term5_cap100_bySpeakerParty = m.select_phrases_from_df(dfbypartyspeaker5,dfoverall5cap100,['Speaker Party','Speaker'])
term5_cap100_bySpeakerParty.shape


# %% tfidf top 500
term5_top500_bySpeakerParty_scaled = m.make_share(term5_top500_bySpeakerParty)
term5_top500_bySpeakerParty_share = m.make_share(term5_top500_bySpeakerParty, scale=False)
# %% tfidf top 1000
term5_top1000_bySpeakerParty_scaled = m.make_share(term5_top1000_bySpeakerParty)
term5_top1000_bySpeakerParty_share = m.make_share(term5_top1000_bySpeakerParty, scale=False)
#%% cap20
term5_cap20_scaled = m.make_share(term5_cap20_bySpeakerParty)
term5_cap20_share = m.make_share(term5_cap20_bySpeakerParty, scale=False)

#%% cap50
term5_cap50_scaled = m.make_share(term5_cap50_bySpeakerParty)
term5_cap50_share = m.make_share(term5_cap50_bySpeakerParty, scale=False)

#%% cap 100
term5_cap100_scaled = m.make_share(term5_cap100_bySpeakerParty)
term5_cap100_share = m.make_share(term5_cap100_bySpeakerParty, scale=False)
Пример #3
0
        parties = parties[:n]
        dfbypartyspeaker = dfbypartyspeaker[dfbypartyspeaker['Speaker Party'].isin(parties)]

    dfbypartyspeaker_filt = dfbypartyspeaker[dfbypartyspeaker.Phrase.isin(pp1.union(pp2)).apply(lambda x: not x)]
    #%%
    dfbypartyspeaker_filt['TotalCounts'] = dfbypartyspeaker_filt.groupby('Phrase')['Counts'].transform('sum')
    dfbypartyspeaker_filt=dfbypartyspeaker_filt[dfbypartyspeaker_filt.TotalCounts >= N]
    # %%

    dftable = dfbypartyspeaker_filt.pivot_table(index=['Speaker Party','Speaker'],columns='Phrase',values='Counts')


    dftable=dftable.fillna(0)
    dftable.reset_index(inplace=True)

    dftable_scaled = m.make_share(dftable)
    dftable_share = m.make_share(dftable, scale=False)

    dftable.to_csv(sys.argv[in_len+3+i*3])
    dftable_scaled.to_csv(sys.argv[in_len+4+i*3])
    dftable_share.to_csv(sys.argv[in_len+5+i*3])

# dfbypartyspeaker = pd.read_pickle('../../interim/t5_byPartySpeaker.pkl')
#%%


#%%

# pp2 = pd.read_pickle('procedural_phrases_SpSvpDistinct.pkl')

import pandas as pd
import sys
sys.path.append('../../Modules')
import modules as m
import ast

d=pd.DataFrame({'Speaker':[1,2],'Speaker Party':[2,2],'a':[1,2],'b':[2,2]})
d
m.make_share(d,scale=False)
dfbypartyspeaker = pd.read_pickle('../../../interim/all_byPartySpeakerTerm.pkl')

phrases = pd.read_csv('../../../interim/fixd_P2/phrases_all_terms_tfidf_top500each_P2.csv')
phrases = phrases.Phrase.apply(ast.literal_eval)
phrases4 = pd.read_csv('../../../interim/fixd_P4/phrases_all_terms_tfidf_top250each_P4.csv')
phrases4 = phrases4.Phrase.apply(ast.literal_eval)

phrasesALL = pd.read_csv('../../../interim/fixed/phrases_all_terms_tfidf_top100each.csv')
phrasesALL = phrasesALL.Phrase.apply(ast.literal_eval)


p1 = 'Sozialdemokratische Partei der Schweiz (SP)'
p2 = 'Schweizerische Volkspartei (SVP)'
p3 = 'FDP.Die Liberalen (FDP-Liberale)'
p4 = 'Christlichdemokratische Volkspartei der Schweiz (CVP)'
parties = [p1,p2,p3,p4]
parties2 = parties[:2]

dfbypartyspeaker2 = dfbypartyspeaker[dfbypartyspeaker['Speaker Party'].isin(parties2)]
dfbypartyspeaker4 = dfbypartyspeaker[dfbypartyspeaker['Speaker Party'].isin(parties)]