#mod_number = 5, mod_value = 1, date = '09302015', with_date = True) #all_target_test_Bow_list_dic, all_target_pred_Bow_list_dic = evaluate_with_SVM_3_k_fold(BOW_vecdic,toptarget_dic, newl_dic) k_bow_dic = {} for dic_key in newl_dic: #dic_key = 'close_previousday_to_close_nextday' newl = np.array(newl_dic[dic_key]) target = np.array(toptarget_dic[dic_key]) newl_zero_one = newl[target != 0] length = min(len(target[target== -1]), len(target[target== 1])) target_balanced = np.r_[target[target== -1][0:length], target[target== 1][0:length]] target_balanced[target_balanced == -1] = 0 newl_balanced = np.r_[newl[target== -1][0:length], newl[target== 1][0:length]] BOW_vec_Mat = np.array([np.array(BOW_vecdic[key]) for key in newl_balanced]) k_bow = yahoo_data_preprocess_func.evaluate_data(BOW_vec_Mat,target_balanced, clf = svm.LinearSVC()) y_true_all, y_pred_all,clf_list = k_bow print classification_report(y_true_all, y_pred_all, digits = 4) print accuracy_score(y_true_all, y_pred_all) k_bow_dic[dic_key] = k_bow for dic_key in k_bow_dic: print dic_key y_true_all, y_pred_all,clf_list = k_bow_dic[dic_key] print classification_report(y_true_all, y_pred_all, digits = 4) print accuracy_score(y_true_all, y_pred_all) #CDRで予測 k_CDR_dic = {} for dic_key in newl_dic: #dic_key = 'close_previousday_to_close_nextday'
lambda_ = 0.1 L_mat = np.diag(W.sum(axis = 0)) - W F_func = np.linalg.inv(lambda_ * L_mat + np.eye(len(L_mat))) f = F_func.dot(y) #for index, name in enumerate(limitNammelist): #try: #print pne2[name],name, y[index], f[index] #except: #continue BowMat_limit = NewtopdocveccategoryMat_BOW.T[NewtopdocveccategoryMat_BOW.sum(axis = 0) > 10].T f_Mat = np.dot(np.ones((10000,1)), f.T) #BowMat_gragh_polarity = BowMat_limit * f BowMat_gragh_polarity = BowMat_limit * f_Mat target = np.array([0] * 5000 + [1] * 5000) y_true_all, y_pred_all, clf_list = yahoo_data_preprocess_func.evaluate_data(BowMat_gragh_polarity,target, clf = svm.LinearSVC()) print confusion_matrix(y_true_all, y_pred_all) print classification_report(y_true_all, y_pred_all, digits = 4) print accuracy_score(y_true_all, y_pred_all) y_true_all, y_pred_all, clf_list = yahoo_data_preprocess_func.evaluate_data(BowMat_limit,target, clf = svm.LinearSVC()) print confusion_matrix(y_true_all, y_pred_all) print classification_report(y_true_all, y_pred_all, digits = 4) print accuracy_score(y_true_all, y_pred_all) val_y = chainer.Variable(y.astype(np.float32).T) class graph_polarity_metohd(Chain): def __init__(self): super(graph_polarity_metohd, self).__init__(
DimentionN = 1000 word2vecdic = pickle.load(open(("word2vecdic_" + str(DimentionN) + ".pkl"),"r")) #word2vecdic = yahoo_data_preprocess_func.create_word2vec_dictionary(DimentionN) voclist = word2vecdic.keys() #BOWで予測 print "predicting by BOW" yahooboarddataset_minus_5000_Bow_Vec = yahoo_data_preprocess_func.create_bow_vectorMat(yahooboarddataset_minus_5000,vocabIDdic) yahooboarddataset_plus_5000_Bow_Vec = yahoo_data_preprocess_func.create_bow_vectorMat(yahooboarddataset_plus_5000,vocabIDdic) yahooboarddataset_neutral_5000_Bow_Vec = yahoo_data_preprocess_func.create_bow_vectorMat(yahooboarddataset_neutral_5000,vocabIDdic) #training_data_Mat = np.r_[yahooboarddataset_minus_5000_Bow_Vec[0:4000], yahooboarddataset_plus_5000_Bow_Vec[0:4000]] #testdata_Mat = np.r_[yahooboarddataset_plus_5000_Bow_Vec[4000:], yahooboarddataset_minus_5000_Bow_Vec[4000:]] data_Mat = np.r_[yahooboarddataset_minus_5000_Bow_Vec, yahooboarddataset_plus_5000_Bow_Vec] #data_Mat = np.r_[yahooboarddataset_minus_5000_Bow_Vec, yahooboarddataset_neutral_5000_Bow_Vec, yahooboarddataset_plus_5000_Bow_Vec] target = np.array([0] * 5000 + [1] * 5000) #target = np.array([0] * 5000 + [1] * 5000 + [2] * 5000) y_true_all, y_pred_all,clf_list = yahoo_data_preprocess_func.evaluate_data(data_Mat,target, clf = svm.LinearSVC()) print classification_report(y_true_all, y_pred_all, digits = 4) print accuracy_score(y_true_all, y_pred_all) #CDR法で予測 print "predicting by CDR(" + str(DimentionN) + ")" yahooboarddataset_minus_CDRVec_5000 = yahoo_data_preprocess_func.create_categoryvector(yahooboarddataset_minus_5000, word2vecdic, DimentionN) yahooboarddataset_neutral_CDRVec_5000 = yahoo_data_preprocess_func.create_categoryvector(yahooboarddataset_neutral_5000, word2vecdic, DimentionN) yahooboarddataset_plus_CDRVec_5000 = yahoo_data_preprocess_func.create_categoryvector(yahooboarddataset_plus_5000, word2vecdic, DimentionN) CDRVec_Mat = np.r_[np.array(yahooboarddataset_minus_CDRVec_5000.values()), #np.array(yahooboarddataset_neutral_CDRVec_5000.values()), np.array(yahooboarddataset_plus_CDRVec_5000.values())] #target = np.array([0] * 5000 + [1] * 5000) y_true_all, y_pred_all, clf_list = yahoo_data_preprocess_func.evaluate_data(CDRVec_Mat,target, clf = svm.LinearSVC()) print confusion_matrix(y_true_all, y_pred_all)
BOW_vecdic[name] = dense bow_docs_all_zeros[name] = all(d == 0 for d in dense) k_bow_dic = {} for dic_key in newl_dic: # dic_key = 'close_previousday_to_close_nextday' newl = np.array(newl_dic[dic_key]) target = np.array(toptarget_dic[dic_key]) newl_zero_one = newl[target != 0] length = min(len(target[target == -1]), len(target[target == 1])) target_balanced = np.r_[target[target == -1][0:length], target[target == 1][0:length]] target_balanced[target_balanced == -1] = 0 newl_balanced = np.r_[newl[target == -1][0:length], newl[target == 1][0:length]] BOW_vec_Mat = np.array([np.array(BOW_vecdic[key]) for key in newl_balanced]) k_bow = yahoo_data_preprocess_func.evaluate_data(BOW_vec_Mat, target_balanced, clf=svm.LinearSVC()) y_true_all, y_pred_all, clf_list = k_bow print classification_report(y_true_all, y_pred_all, digits=4) print accuracy_score(y_true_all, y_pred_all) k_bow_dic[dic_key] = k_bow for dic_key in k_bow_dic: print dic_key y_true_all, y_pred_all, clf_list = k_bow_dic[dic_key] print classification_report(y_true_all, y_pred_all, digits=4) print accuracy_score(y_true_all, y_pred_all) # CDRで予測 k_CDR_dic = {} for dic_key in newl_dic: # dic_key = 'close_previousday_to_close_nextday'