#mod_number = 5, mod_value = 1, date = '09302015', with_date = True)

#all_target_test_Bow_list_dic, all_target_pred_Bow_list_dic = evaluate_with_SVM_3_k_fold(BOW_vecdic,toptarget_dic, newl_dic)

k_bow_dic = {}
for dic_key in newl_dic:
    #dic_key = 'close_previousday_to_close_nextday'
    newl = np.array(newl_dic[dic_key])
    target = np.array(toptarget_dic[dic_key])
    newl_zero_one = newl[target != 0]
    length = min(len(target[target== -1]), len(target[target== 1]))
    target_balanced = np.r_[target[target== -1][0:length], target[target== 1][0:length]]
    target_balanced[target_balanced == -1] = 0
    newl_balanced = np.r_[newl[target== -1][0:length], newl[target== 1][0:length]]
    BOW_vec_Mat = np.array([np.array(BOW_vecdic[key]) for key in newl_balanced])
    k_bow = yahoo_data_preprocess_func.evaluate_data(BOW_vec_Mat,target_balanced, clf = svm.LinearSVC())
    y_true_all, y_pred_all,clf_list = k_bow
    print classification_report(y_true_all, y_pred_all, digits = 4)
    print accuracy_score(y_true_all, y_pred_all)
    k_bow_dic[dic_key] = k_bow

for dic_key in k_bow_dic:
    print dic_key
    y_true_all, y_pred_all,clf_list = k_bow_dic[dic_key]
    print classification_report(y_true_all, y_pred_all, digits = 4)
    print accuracy_score(y_true_all, y_pred_all)

#CDRで予測
k_CDR_dic = {}
for dic_key in newl_dic:
    #dic_key = 'close_previousday_to_close_nextday'
lambda_ = 0.1
L_mat = np.diag(W.sum(axis = 0)) - W
F_func = np.linalg.inv(lambda_ * L_mat + np.eye(len(L_mat)))
f = F_func.dot(y)
#for index, name in enumerate(limitNammelist):
	#try:
		#print pne2[name],name, y[index], f[index]
	#except:
		#continue

BowMat_limit = NewtopdocveccategoryMat_BOW.T[NewtopdocveccategoryMat_BOW.sum(axis = 0) > 10].T
f_Mat = np.dot(np.ones((10000,1)), f.T)
#BowMat_gragh_polarity = BowMat_limit * f
BowMat_gragh_polarity = BowMat_limit * f_Mat
target = np.array([0] * 5000 + [1] * 5000)
y_true_all, y_pred_all, clf_list = yahoo_data_preprocess_func.evaluate_data(BowMat_gragh_polarity,target, clf = svm.LinearSVC())
print confusion_matrix(y_true_all, y_pred_all)
print classification_report(y_true_all, y_pred_all, digits = 4)
print accuracy_score(y_true_all, y_pred_all)

y_true_all, y_pred_all, clf_list = yahoo_data_preprocess_func.evaluate_data(BowMat_limit,target, clf = svm.LinearSVC())
print confusion_matrix(y_true_all, y_pred_all)
print classification_report(y_true_all, y_pred_all, digits = 4)
print accuracy_score(y_true_all, y_pred_all)


val_y = chainer.Variable(y.astype(np.float32).T)

class graph_polarity_metohd(Chain):
    def __init__(self):
        super(graph_polarity_metohd, self).__init__(
Exemplo n.º 3
0
DimentionN = 1000
word2vecdic = pickle.load(open(("word2vecdic_" + str(DimentionN) + ".pkl"),"r"))
#word2vecdic = yahoo_data_preprocess_func.create_word2vec_dictionary(DimentionN)
voclist = word2vecdic.keys()
#BOWで予測
print "predicting by BOW"
yahooboarddataset_minus_5000_Bow_Vec = yahoo_data_preprocess_func.create_bow_vectorMat(yahooboarddataset_minus_5000,vocabIDdic)
yahooboarddataset_plus_5000_Bow_Vec = yahoo_data_preprocess_func.create_bow_vectorMat(yahooboarddataset_plus_5000,vocabIDdic)
yahooboarddataset_neutral_5000_Bow_Vec = yahoo_data_preprocess_func.create_bow_vectorMat(yahooboarddataset_neutral_5000,vocabIDdic)
#training_data_Mat = np.r_[yahooboarddataset_minus_5000_Bow_Vec[0:4000], yahooboarddataset_plus_5000_Bow_Vec[0:4000]]
#testdata_Mat = np.r_[yahooboarddataset_plus_5000_Bow_Vec[4000:], yahooboarddataset_minus_5000_Bow_Vec[4000:]]
data_Mat = np.r_[yahooboarddataset_minus_5000_Bow_Vec, yahooboarddataset_plus_5000_Bow_Vec]
#data_Mat = np.r_[yahooboarddataset_minus_5000_Bow_Vec, yahooboarddataset_neutral_5000_Bow_Vec, yahooboarddataset_plus_5000_Bow_Vec]
target = np.array([0] * 5000 + [1] * 5000)
#target = np.array([0] * 5000 + [1] * 5000 + [2] * 5000)
y_true_all, y_pred_all,clf_list = yahoo_data_preprocess_func.evaluate_data(data_Mat,target, clf = svm.LinearSVC())
print classification_report(y_true_all, y_pred_all, digits = 4)
print accuracy_score(y_true_all, y_pred_all)


#CDR法で予測
print "predicting by CDR(" + str(DimentionN) + ")"
yahooboarddataset_minus_CDRVec_5000 = yahoo_data_preprocess_func.create_categoryvector(yahooboarddataset_minus_5000, word2vecdic, DimentionN)
yahooboarddataset_neutral_CDRVec_5000 = yahoo_data_preprocess_func.create_categoryvector(yahooboarddataset_neutral_5000, word2vecdic, DimentionN)
yahooboarddataset_plus_CDRVec_5000 = yahoo_data_preprocess_func.create_categoryvector(yahooboarddataset_plus_5000, word2vecdic, DimentionN)
CDRVec_Mat = np.r_[np.array(yahooboarddataset_minus_CDRVec_5000.values()),
					#np.array(yahooboarddataset_neutral_CDRVec_5000.values()),
				    np.array(yahooboarddataset_plus_CDRVec_5000.values())]
#target = np.array([0] * 5000 + [1] * 5000)
y_true_all, y_pred_all, clf_list = yahoo_data_preprocess_func.evaluate_data(CDRVec_Mat,target, clf = svm.LinearSVC())
print confusion_matrix(y_true_all, y_pred_all)
    BOW_vecdic[name] = dense
    bow_docs_all_zeros[name] = all(d == 0 for d in dense)


k_bow_dic = {}
for dic_key in newl_dic:
    # dic_key = 'close_previousday_to_close_nextday'
    newl = np.array(newl_dic[dic_key])
    target = np.array(toptarget_dic[dic_key])
    newl_zero_one = newl[target != 0]
    length = min(len(target[target == -1]), len(target[target == 1]))
    target_balanced = np.r_[target[target == -1][0:length], target[target == 1][0:length]]
    target_balanced[target_balanced == -1] = 0
    newl_balanced = np.r_[newl[target == -1][0:length], newl[target == 1][0:length]]
    BOW_vec_Mat = np.array([np.array(BOW_vecdic[key]) for key in newl_balanced])
    k_bow = yahoo_data_preprocess_func.evaluate_data(BOW_vec_Mat, target_balanced, clf=svm.LinearSVC())
    y_true_all, y_pred_all, clf_list = k_bow
    print classification_report(y_true_all, y_pred_all, digits=4)
    print accuracy_score(y_true_all, y_pred_all)
    k_bow_dic[dic_key] = k_bow

for dic_key in k_bow_dic:
    print dic_key
    y_true_all, y_pred_all, clf_list = k_bow_dic[dic_key]
    print classification_report(y_true_all, y_pred_all, digits=4)
    print accuracy_score(y_true_all, y_pred_all)

# CDRで予測
k_CDR_dic = {}
for dic_key in newl_dic:
    # dic_key = 'close_previousday_to_close_nextday'