Пример #1
0
def writefile2(Xtr, ytr, ntr, model, cvm, filer, rn):
    cvv = cv(Xtr, ytr, ntr, model, cvm)
    r2, mae, q2lmo, rm2tr, drm2tr, ls = cvv.fit()
    dftr1 = pd.concat([ntr, Xtr], axis=1)
    #dftr2=ls.iloc[:,0:2]
    dftr = pd.merge(dftr1,
                    ls.iloc[:, 0:3],
                    on=ls.iloc[:, 0:1].columns[0],
                    how='left')
    dftr.to_csv(str(c_) + str(rn) + "_trpr.csv", index=False)
    filer.write('R2: ' + str(r2) + "\n")
    filer.write(str(cvm) + '-fold cross-validated R2: ' + str(q2lmo) + "\n")
    filer.write('Mean absolute error: ' + str(mae) + "\n")
    filer.write('Rm2tr ' + str(rm2tr) + "\n")
    filer.write('Delta Rm2tr ' + str(drm2tr) + "\n")
    if ytr.columns[0] in file2.columns:
        Xts = file2[Xtr.columns]
        nts = file2.iloc[:, 0:1]
        yts = file2.iloc[:, 1:2]
        ytspr = pd.DataFrame(model.predict(Xts))
        ytspr.columns = ['Pred']
        rm2ts, drm2ts = rm2(yts, ytspr).fit()
        tsdf = pd.concat([yts, pd.DataFrame(ytspr)], axis=1)
        tsdf.columns = ['Active', 'Predict']
        tsdf['Aver'] = ytr.values.mean()
        tsdf['Aver2'] = tsdf['Predict'].mean()
        tsdf['diff'] = tsdf['Active'] - tsdf['Predict']
        tsdf['diff2'] = tsdf['Active'] - tsdf['Aver']
        tsdf['diff3'] = tsdf['Active'] - tsdf['Aver2']
        maets = mean_absolute_error(tsdf['Active'], tsdf['Predict'])
        r2pr = 1 - ((tsdf['diff']**2).sum() / (tsdf['diff2']**2).sum())
        r2pr2 = 1 - ((tsdf['diff']**2).sum() / (tsdf['diff3']**2).sum())
        RMSEP = ((tsdf['diff']**2).sum() / tsdf.shape[0])**0.5
        dfts = pd.concat([nts, Xts, yts, ytspr], axis=1)
        dfts.to_csv(str(c_) + str(rn) + "_tspr.csv", index=False)
        filer.write("\n")
        filer.write('Test set results: ' + "\n")
        filer.write('Number of observations: ' + str(yts.shape[0]) + "\n")
        filer.write('MAEtest: ' + str(maets) + "\n")
        filer.write('Q2F1/R2Pred: ' + str(r2pr) + "\n")
        filer.write('Q2F2: ' + str(r2pr2) + "\n")
        filer.write('rm2test: ' + str(rm2ts) + "\n")
        filer.write('delta rm2test: ' + str(drm2ts) + "\n")
        filer.write('RMSEP: ' + str(RMSEP) + "\n")
        filer.write("\n")

    else:
        Xts = file2.iloc[:, 1:]
        nts = file2.iloc[:, 0:1]
        ytspr = pd.DataFrame(reg.predict(Xts[a]))
        ytspr.columns = ['Pred']
        adts = apdom(Xts[a], Xtr[a])
        yadts = adts.fit()
        dfts = pd.concat([nts, Xts[a], ytspr], axis=1)
        dfts.to_csv(str(c_) + str(rn) + "_scpr.csv", index=False)
Пример #2
0
def writefile2(X,y,model,cvm,filerw):
    cvv=cv(X,y,model,cvm)
    a1,a2,a3,a4,a5,a6,a7,a8=cvv.fit()
    filerw.write('True Positive: '+str(a1)+"\n")
    filerw.write('True Negative: '+str(a2)+"\n")
    filerw.write('False Positive '+str(a3)+"\n")
    filerw.write('False Negative '+str(a4)+"\n")
    filerw.write('Sensitivity: '+str(a5)+"\n")
    filerw.write('Specificity: '+str(a6)+"\n")
    filerw.write('Accuracy: '+str(a7)+"\n")
    filerw.write('f1_score: '+str(a8)+"\n")
Пример #3
0
import numpy as np
from sklearn import cross_validation
from cross_validation import cv
from word2vec_fn import build_doc_vector
# '''
model = load_embeddings('CVAT_docvecs')
print(model.docvecs[1])
print(model.docvecs['SENT_23'])
print(len(model.vocab.keys()))

corpus = load_corpus(get_file_path('cn_corpus'))
mark = load_mark(get_file_path('mark'))
vecs = build_doc_vector(corpus, model)

valence, arousal = gold_valence_arousal(corpus, mark)
cv(vecs, valence, multivariant=True)
cv(vecs, arousal, multivariant=True)
# '''
# from save_data import dump_picle
# dump_picle(model.key(), get_file_path('words_in_wordvec'))
# print('ok')
#
# # print(model.most_similar(positive=['woman', 'king'], negative=['man'], topn=1))
# # print(model.doesnt_match("breakfast cereal dinner lunch".split()))
# # print(model.similarity('woman', 'man'))
# # print(model.most_similar_cosmul(positive=['baghdad', 'england'], negative=['london'], topn=10))
# # print(model.n_similarity(['sushi', 'shop'], ['japanese', 'restaurant']))
#
# from load_data import load_pickle
# words = load_pickle(get_file_path('words_in_wordvec'))
# print(words)
Пример #4
0
from affective_score_vader import screen_data
from load_data import load_anew

print('start')
model = load_embeddings('google_news')

corpus, ratings = load_vader(['tweets', 'movie_reviews', 'product_reviews', 'news_articles'])
lexicon_name = get_file_path('anew')
words, valences, _ = load_anew(lexicon_name)
corpus, ratings = screen_data(corpus, ratings, words)
ratings = np.array(ratings) + np.ones(len(ratings), dtype=float) * 5
print(np.histogram(ratings, bins=range(10)))
print(len(model.vocab.keys()))
vecs = np.concatenate([buill_word_vector(text, model, size=300) for text in corpus])
print(vecs[1])
cv(vecs, ratings, multivariant=True)

vecs = None
ratings = None
corpus, ratings = load_vader(['tweets'])
lexicon_name = get_file_path('anew')
words, valences, _ = load_anew(lexicon_name)
corpus, ratings = screen_data(corpus, ratings, words)
ratings = np.array(ratings) + np.ones(len(ratings), dtype=float) * 5
print(np.histogram(ratings, bins=range(10)))
print(len(model.vocab.keys()))
vecs = np.concatenate([buill_word_vector(text, model, size=300) for text in corpus])
print(vecs[1])
cv(vecs, ratings, multivariant=True)

vecs = None