use_indexes = np.array(list(set(range(10000)) - set(except_indexes)))
use_indexes_minus = use_indexes[use_indexes < 5000]
use_indexes_plus = use_indexes[use_indexes >= 5000]
length = min(len(use_indexes_minus), len(use_indexes_plus))
#length = 3700
CategoryMat_zeroadded_plus_minus_sampled = defaultdict(int)
target_sample = np.array([0]* length + [1] * length).astype(np.int32)
#CategoryMat_zeroadded_plus_minus_neutral_sampled = defaultdict(int)
for index in NewpreW:
	CategoryMat_zeroadded_plus_minus_sampled[index] = NewtopdocveccategoryMat_zeroadded[index][list(use_indexes_minus[0:length]) + list(use_indexes_plus[0:length])]
	#CategoryMat_zeroadded_plus_minus_neutral_sampled[index] = NewtopdocveccategoryMat_zeroadded[index][list(use_indexes_minus[0:length]) + range(5000, 5000 + length) +list(use_indexes_plus[0:length])]

initialW = np.r_[-0.01 * np.random.random((1,len(NewpreW_zeroadded))),0.01 * np.random.random((1,len(NewpreW_zeroadded)))]

k2 = yahoo_data_preprocess_func.caluculatemodel_gpu(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	CategoryMat_zeroadded_plus_minus_sampled,NewpreW_zeroadded,target_sample,
	n_epoch = 5,batchsize = 100,using_gpu = False)


#2値分類においてまちがっていそうなのは除いて評価(5crossvalidation)

#skf = StratifiedKFold(target, n_folds=5,shuffle = False)
#y_all = target.astype(np.int32)
#k2_newlist = []
scale = 0.01
initialW = np.r_[-scale * np.random.random((1,len(NewpreW_zeroadded))),scale * np.random.random((1,len(NewpreW_zeroadded)))]
#initialW = np.r_[np.random.random((2,len(NewpreW_zeroadded))) * scale * 2 - scale]
def evaluate_siboru(initialW, n_epoch = 10, using_gpu = True):
	skf = StratifiedKFold(target, n_folds=5,shuffle = False)
	y_all = target.astype(np.int32)
	k2_newlist = []
예제 #2
0
	NewpreW_zeroadded[index] = np.c_[NewpreW[index], np.zeros((1,max_numwords_in_class - NewpreW[index].shape[1]))]
	NewtopdocveccategoryMat_zeroadded[index] = np.c_[NewtopdocveccategoryMat[index], 
		np.zeros((NewtopdocveccategoryMat[index].shape[0],max_numwords_in_class - NewpreW[index].shape[1])).astype(np.float32)]

#x_all = np.concatenate(NewtopdocveccategoryMat_zeroadded.values()).reshape(len(NewpreW_zeroadded),len(target),NewpreW_zeroadded[0].shape[1]).astype(np.float32)

#model = yahoo_data_preprocess_func.Classifier(yahoo_data_preprocess_func.IIalgorithm_simple_gpu(NewpreW_zeroadded, 100, False))
#model.predictor.l_hidden.W.data
#model.predictor(Variable(x_all))

using_gpu = False
binary = True
using_gpu = True
#binary = False
k2 = yahoo_data_preprocess_func.caluculatemodel_gpu(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	NewtopdocveccategoryMat_zeroadded,NewpreW_zeroadded,target,
	n_epoch = 10,batchsize = 100,using_gpu = using_gpu, binary = binary)	

#pickle.dump(k2,open("result_IIalgosimple_gpu_" + str(DimentionN) + ".dump","w"))

#極性辞書作成
#k2 = pickle.load(open("result_IIalgosimple_1000.dump","r"))
#k2 = pickle.load(open("result_IIalgosimple_2000.dump","r"))
#k2 = pickle.load(open("result_IIalgosimple_5000.dump","r"))


#vmats0, modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2
(vmats0,y_pred_II_algo_all), modellist, accuracylist, y_trueall, y_pridictall, sum_loss_all, sum_accuracy_all = k2

"""
#閾値を変えた時のPrecisionを測定
예제 #3
0
#x_val = Variable(x_value.astype(np.float32))
#y_val = F.transpose(F.reshape(l_hidden(x_val), (DimentionN, datavolume*2)))

#np.concatenate(preW.values())
#l_hidden.W.data.shape
#batch_matmul(l_hidden.W,x_val )
#y_valは100,2000にしたい
"""
k = yahoo_data_preprocess_func.caluculatemodel_gpu2(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu_1, 
	topdocveccategoryMat3,preW,target4,
	n_epoch = 10,batchsize = 10,using_gpu = False,binary = True)
"""

k = yahoo_data_preprocess_func.caluculatemodel_gpu(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	topdocveccategoryMat3,preW,target4,
	n_epoch = 10,batchsize = 100,using_gpu = False,binary = True)

inithialW_3_haba = 0.1
initialW = np.r_[-inithialW_3_haba * np.random.random((1,len(preW))),inithialW_3_haba * np.random.random((1,len(preW)))]

k2 = yahoo_data_preprocess_func.caluculatemodel_gpu2(
	yahoo_data_preprocess_func.IIalgorithm_simple_gpu, 
	topdocveccategoryMat3,preW,
	initialW,target4,
	n_epoch = 10,batchsize = 10,using_gpu = False, binary = True)

#k[1][0].predictor.l_hidden.W.data
#for model in k3[1]:
#for model in k[1]:
for model in k2[1]: