use_indexes = np.array(list(set(range(10000)) - set(except_indexes))) use_indexes_minus = use_indexes[use_indexes < 5000] use_indexes_plus = use_indexes[use_indexes >= 5000] length = min(len(use_indexes_minus), len(use_indexes_plus)) #length = 3700 CategoryMat_zeroadded_plus_minus_sampled = defaultdict(int) target_sample = np.array([0]* length + [1] * length).astype(np.int32) #CategoryMat_zeroadded_plus_minus_neutral_sampled = defaultdict(int) for index in NewpreW: CategoryMat_zeroadded_plus_minus_sampled[index] = NewtopdocveccategoryMat_zeroadded[index][list(use_indexes_minus[0:length]) + list(use_indexes_plus[0:length])] #CategoryMat_zeroadded_plus_minus_neutral_sampled[index] = NewtopdocveccategoryMat_zeroadded[index][list(use_indexes_minus[0:length]) + range(5000, 5000 + length) +list(use_indexes_plus[0:length])] initialW = np.r_[-0.01 * np.random.random((1,len(NewpreW_zeroadded))),0.01 * np.random.random((1,len(NewpreW_zeroadded)))] k2 = yahoo_data_preprocess_func.caluculatemodel_gpu( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, CategoryMat_zeroadded_plus_minus_sampled,NewpreW_zeroadded,target_sample, n_epoch = 5,batchsize = 100,using_gpu = False) #2値分類においてまちがっていそうなのは除いて評価(5crossvalidation) #skf = StratifiedKFold(target, n_folds=5,shuffle = False) #y_all = target.astype(np.int32) #k2_newlist = [] scale = 0.01 initialW = np.r_[-scale * np.random.random((1,len(NewpreW_zeroadded))),scale * np.random.random((1,len(NewpreW_zeroadded)))] #initialW = np.r_[np.random.random((2,len(NewpreW_zeroadded))) * scale * 2 - scale] def evaluate_siboru(initialW, n_epoch = 10, using_gpu = True): skf = StratifiedKFold(target, n_folds=5,shuffle = False) y_all = target.astype(np.int32) k2_newlist = []
NewpreW_zeroadded[index] = np.c_[NewpreW[index], np.zeros((1,max_numwords_in_class - NewpreW[index].shape[1]))] NewtopdocveccategoryMat_zeroadded[index] = np.c_[NewtopdocveccategoryMat[index], np.zeros((NewtopdocveccategoryMat[index].shape[0],max_numwords_in_class - NewpreW[index].shape[1])).astype(np.float32)] #x_all = np.concatenate(NewtopdocveccategoryMat_zeroadded.values()).reshape(len(NewpreW_zeroadded),len(target),NewpreW_zeroadded[0].shape[1]).astype(np.float32) #model = yahoo_data_preprocess_func.Classifier(yahoo_data_preprocess_func.IIalgorithm_simple_gpu(NewpreW_zeroadded, 100, False)) #model.predictor.l_hidden.W.data #model.predictor(Variable(x_all)) using_gpu = False binary = True using_gpu = True #binary = False k2 = yahoo_data_preprocess_func.caluculatemodel_gpu( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded,target, n_epoch = 10,batchsize = 100,using_gpu = using_gpu, binary = binary) #pickle.dump(k2,open("result_IIalgosimple_gpu_" + str(DimentionN) + ".dump","w")) #極性辞書作成 #k2 = pickle.load(open("result_IIalgosimple_1000.dump","r")) #k2 = pickle.load(open("result_IIalgosimple_2000.dump","r")) #k2 = pickle.load(open("result_IIalgosimple_5000.dump","r")) #vmats0, modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2 (vmats0,y_pred_II_algo_all), modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2 """ #閾値を変えた時のPrecisionを測定
#x_val = Variable(x_value.astype(np.float32)) #y_val = F.transpose(F.reshape(l_hidden(x_val), (DimentionN, datavolume*2))) #np.concatenate(preW.values()) #l_hidden.W.data.shape #batch_matmul(l_hidden.W,x_val ) #y_valは100,2000にしたい """ k = yahoo_data_preprocess_func.caluculatemodel_gpu2( yahoo_data_preprocess_func.IIalgorithm_simple_gpu_1, topdocveccategoryMat3,preW,target4, n_epoch = 10,batchsize = 10,using_gpu = False,binary = True) """ k = yahoo_data_preprocess_func.caluculatemodel_gpu( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat3,preW,target4, n_epoch = 10,batchsize = 100,using_gpu = False,binary = True) inithialW_3_haba = 0.1 initialW = np.r_[-inithialW_3_haba * np.random.random((1,len(preW))),inithialW_3_haba * np.random.random((1,len(preW)))] k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2( yahoo_data_preprocess_func.IIalgorithm_simple_gpu, topdocveccategoryMat3,preW, initialW,target4, n_epoch = 10,batchsize = 10,using_gpu = False, binary = True) #k[1][0].predictor.l_hidden.W.data #for model in k3[1]: #for model in k[1]: for model in k2[1]: