def evaluate_siboru(initialW, n_epoch = 10, using_gpu = True): skf = StratifiedKFold(target, n_folds=5,shuffle = False) y_all = target.astype(np.int32) k2_newlist = [] all_except_indexes = [] for train, test in skf: topdocveccategoryMat_train = {} topdocveccategoryMat_test = {} for labelnum in range(DimentionN): topdocveccategoryMat_train[labelnum] = NewtopdocveccategoryMat_zeroadded[labelnum][train] topdocveccategoryMat_test[labelnum] = NewtopdocveccategoryMat_zeroadded[labelnum][test] y_train, y_test = y_all[train],y_all[test] k2_kyu = yahoo_data_preprocess_func.caluculatemodel_gpu2( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat_train, NewpreW_zeroadded,initialW, y_train, n_epoch = n_epoch,batchsize = 100, using_gpu = using_gpu, binary = True, print_True = False) (vmats0,y_pred_II_algo_all), modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2_kyu skf_sub = StratifiedKFold(y_train, n_folds=5,shuffle = False) except_indexes = [] for i, (train_sub, test_sub) in enumerate(skf_sub): test_indexes = train[test_sub] test_target = y_trueall[i*len(test_indexes):(i+1) * len(test_indexes)] if using_gpu == False: pred = y_pred_II_algo_all[i*len(test_indexes):(i +1) * len(test_indexes)].argmax(axis = 1) else: pred = chainer.cuda.to_cpu(y_pred_II_algo_all[i*len(test_indexes):(i +1) * len(test_indexes)].argmax(axis = 1)) print 1 - len(test_indexes[test_target != pred])/float(len(test_indexes)) except_indexes += list(test_indexes[test_target != pred]) all_except_indexes.append(except_indexes) use_indexes = np.array(list(set(train) - set(except_indexes))) use_indexes_minus = use_indexes[use_indexes < 5000] use_indexes_plus = use_indexes[use_indexes >= 5000] length = min(len(use_indexes_minus), len(use_indexes_plus)) CategoryMat_zeroadded_plus_minus_sampled = defaultdict(int) y_train_new = np.array([0]* length + [1] * length).astype(np.int32) for index in NewpreW: CategoryMat_zeroadded_plus_minus_sampled[index] = NewtopdocveccategoryMat_zeroadded[index][list(use_indexes_minus[0:length]) + list(use_indexes_plus[0:length])] k2_new = yahoo_data_preprocess_func.caluculatemodel_gpu2_train_test( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, CategoryMat_zeroadded_plus_minus_sampled,topdocveccategoryMat_test, NewpreW_zeroadded,initialW, y_train_new,y_test, n_epoch = 50,batchsize = 100, using_gpu = True, binary = True, print_True = False) #using_gpu = False, binary = True) k2_newlist.append(k2_new) return k2_newlist, all_except_indexes
def evaluate_use_unknown_data(initialW2,initialW3, thresh_value_base = 0.5, n_epoch1 = 10, n_epoch2 = 10,binary = True, using_gpu = False): accuracylist = [] sum_loss_all, sum_accuracy_all = [],[] y_pred_II_algo_all = [] y_trueall,y_pridictall = [],[] skf = StratifiedKFold(target, n_folds=5,shuffle = False) y_all = target.astype(np.int32) modellist = [] k3_kyulist =[] k3_newlist = [] length_list = [] for train, test in skf: topdocveccategoryMat_train = {} topdocveccategoryMat_test = {} for labelnum in range(DimentionN): topdocveccategoryMat_train[labelnum] = CategoryMat_zeroadded_plus_minus_neutral[labelnum][train] topdocveccategoryMat_test[labelnum] = CategoryMat_zeroadded_plus_minus_neutral[labelnum][test] y_train, y_test = y_all[train],y_all[test] print "modeling:" k3_kyu = yahoo_data_preprocess_func.caluculatemodel_gpu2_train_test( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat_train,topdocveccategoryMat_test, NewpreW_zeroadded,initialW3, y_train,y_test, n_epoch = n_epoch1,batchsize = 100, using_gpu = using_gpu, binary = False) model = k3_kyu[1] y_pred_II_algo_all = k3_kyu[2] print "precision" for thresh_value in [0.45,0.5, 0.6]: try: print thresh_value print len(np.array(y_test)[y_pred_II_algo_all.T[0] > thresh_value][np.array(y_test)[y_pred_II_algo_all.T[0] > thresh_value] == 0])/float(len(y_pred_II_algo_all.T[0][y_pred_II_algo_all.T[0] > thresh_value])), print len(y_pred_II_algo_all.T[0][y_pred_II_algo_all.T[0] > thresh_value]) print len(np.array(y_test)[y_pred_II_algo_all.T[1] > thresh_value][np.array(y_test)[y_pred_II_algo_all.T[1] > thresh_value] == 1])/float(len(y_pred_II_algo_all.T[1][y_pred_II_algo_all.T[1] > thresh_value])), print len(y_pred_II_algo_all.T[1][y_pred_II_algo_all.T[1] > thresh_value]) print len(np.array(y_test)[y_pred_II_algo_all.T[2] > thresh_value][np.array(y_test)[y_pred_II_algo_all.T[2] > thresh_value] == 2])/float(len(y_pred_II_algo_all.T[2][y_pred_II_algo_all.T[2] > thresh_value])), print len(y_pred_II_algo_all.T[2][y_pred_II_algo_all.T[2] > thresh_value]) except: continue batchsize = 100 y_pred_II_algo_all = [] y_pred_II_algo = [] for i in range(0, 5000, batchsize): x_batch_Mat = {} for index in NewtopdocveccategoryMat_zeroadded_unknown: x_batch_Mat[index] = NewtopdocveccategoryMat_zeroadded_unknown[index][i : i + batchsize] x_batch = chainer.Variable(np.array(np.concatenate(x_batch_Mat.values()).reshape(len(preW),batchsize,NewpreW_zeroadded[0].shape[1]).astype(np.float32))) #x_batch = chainer.Variable(np.array(x_unknown[:,i : i + batchsize,: ])) y_pred_batch, _ = model.predictor(x_batch,train = False) y_pred_II_algo.append(F.softmax(y_pred_batch).data) y_pred_II_algo_batch = np.concatenate(y_pred_II_algo) thresh_value = thresh_value_base #モデルで抽出(全てオッケーのときのみ抜く) NewNegIDLists = (np.array(range(5000))[y_pred_II_algo_batch.T[0] > thresh_value]) NewNeuIDLists = (np.array(range(5000))[y_pred_II_algo_batch.T[1] > thresh_value]) NewPosIDLists = (np.array(range(5000))[y_pred_II_algo_batch.T[2] > thresh_value]) length = min(len(NewPosIDLists), len(NewNeuIDLists), len(NewNegIDLists)) #抜いたデータを用いて再度学習 topdocveccategoryMat_train_new = {} for labelnum in range(DimentionN): if binary == True: topdocveccategoryMat_train_new[labelnum] = np.r_[topdocveccategoryMat_train[labelnum][y_train != 1], NewtopdocveccategoryMat_zeroadded_unknown[labelnum][NewPosIDLists][0:length], NewtopdocveccategoryMat_zeroadded_unknown[labelnum][NewNegIDLists][0:length]] else: topdocveccategoryMat_train_new[labelnum] = np.r_[topdocveccategoryMat_train[labelnum], NewtopdocveccategoryMat_zeroadded_unknown[labelnum][NewNegIDLists[0:length]], NewtopdocveccategoryMat_zeroadded_unknown[labelnum][NewNeuIDLists[0:length]],NewtopdocveccategoryMat_zeroadded_unknown[labelnum][NewPosIDLists[0:length]]] if binary == True: y_train_new = np.r_[y_train[y_train != 1],np.array([2] * length + [0] * length)] y_train_new[y_train_new == 2] = 1 y_test_new = y_test[y_test != 1] y_test_new[y_test_new == 2] = 1 topdocveccategoryMat_test_new = {} for labelnum in range(DimentionN): topdocveccategoryMat_test_new[labelnum] = topdocveccategoryMat_test[labelnum][y_test != 1] print "modeling:" if binary == True: k3_new = yahoo_data_preprocess_func.caluculatemodel_gpu2_train_test( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat_train_new,topdocveccategoryMat_test_new, NewpreW_zeroadded,initialW2, #y_train_new,y_test, y_train_new,y_test_new, n_epoch = n_epoch2,batchsize = 100, #using_gpu = using_gpu, binary = False) using_gpu = using_gpu, binary = True) else: y_train_new = np.r_[y_train,np.array([0] * length + [1] * length + [2] * length)] k3_new = yahoo_data_preprocess_func.caluculatemodel_gpu2_train_test( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat_train_new,topdocveccategoryMat_test, NewpreW_zeroadded,initialW3, y_train_new,y_test, n_epoch = n_epoch2,batchsize = 100, using_gpu = using_gpu, binary = False) k3_kyulist.append(k3_kyu) k3_newlist.append(k3_new) length_list.append(length) return k3_kyulist, k3_newlist,length_list