index=T.lscalar('index') layer0_input= Words[T.cast(x.flatten(),dtype="int32")].\ reshape((x.shape[0],x.shape[1]*Words.shape[1])) #input_printed=theano.printing.Print('layer0_input:')(layer0_input) classifier = MLPDropout(rng, input=layer0_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) params = classifier.params if non_static: params.append(Words) cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) if adadelta: grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) else: grad_updates = sgd_updates(params,dropout_cost,init_learning_rate) #print params numpy.random.seed(3435) if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = numpy.random.permutation(datasets[0]) ## shuffle extra_data = train_set[:extra_data_num] # the batch new_data=numpy.append(datasets[0],extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍 else: new_data = datasets[0]
def train_conv(datasets, wordvec, word_size=150, window_sizes=[9,11,13], hidden_units=[100,100,claz_count], dropout_rate=[0], shuffle_batch=True, n_epochs=10000, batch_size=256, lr_decay=0.95, sqr_norm_lim=9, conv_non_linear="relu", activations=[Tanh],#dropout non_static=True, proportion=1): rng = numpy.random.RandomState(3435) sen_length = len(datasets[0][0])-1 # sentence length filter_w = word_size # filter width feature_maps = hidden_units[0] filter_shapes = [] #filter:param W pool_sizes = [] for filter_h in window_sizes: # filter heighth filter_shapes.append((feature_maps, 1, filter_h,filter_w)) pool_sizes.append((sen_length-filter_h+1, 1)) parameters = [("image shape",sen_length,word_size),("filter shape",filter_shapes), ("hidden_units",hidden_units), ("dropout", dropout_rate), ("batch_size",batch_size),("non_static", non_static), ("learn_decay",lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static) ,("sqr_norm_lim",sqr_norm_lim),("shuffle_batch",shuffle_batch)] print parameters #print wordvec #count = np.shape(wordvec)[0] #wordvec=np.random.uniform(-0.25,0.25,(count,50)) #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX) Words=theano.shared(value=wordvec,name='Words') zero_vec_tensor = T.vector() zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX) set_zero = theano.function([zero_vec_tensor], updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))]) x=T.matrix('x') y=T.ivector('y') index=T.lscalar('index') layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\ reshape((x.shape[0],1,x.shape[1],Words.shape[1])) #theano.printing.debugprint(layer0_input) conv_layers=[] layer1_inputs=[] for i in xrange(len(window_sizes)): filter_shape = filter_shapes[i] pool_size = pool_sizes[i] conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, sen_length, word_size), filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear) layer1_input = conv_layer.output.flatten(2) conv_layers.append(conv_layer) layer1_inputs.append(layer1_input) layer1_input = T.concatenate(layer1_inputs,1) hidden_units[0] = feature_maps*len(window_sizes) #print hidden_units classifier = MLPDropout(rng, input=layer1_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) params = classifier.params for conv_layer in conv_layers: params += conv_layer.params if non_static: params += [Words] cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) numpy.random.seed(3435) test_set = datasets[1] train_set = datasets[0] if train_set.shape[0] % batch_size > 0: extra_data_num = batch_size - train_set.shape[0] % batch_size train_set = numpy.random.permutation(train_set) extra_data = train_set[:extra_data_num] # the batch new_data=numpy.append(train_set,extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍 else: new_data = train_set #train_set = numpy.random.permutation(new_data) train_set_x = new_data[:,1:] test_set_x = test_set[:,1:] train_set_x, train_set_y = shared_dataset((train_set_x,new_data[:,0])) test_set_x, test_set_y = shared_dataset((test_set_x,test_set[:,0])) n_batches = new_data.shape[0]/batch_size #batch num n_train_batches = int(numpy.round(n_batches)) train_model = theano.function([index], cost, updates=grad_updates, givens={ x: train_set_x[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size]}) #theano.printing.debugprint(train_model) test_pred_layers = [] test_size = test_set_x.shape[0].eval() test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\ reshape((x.shape[0],1,x.shape[1],Words.shape[1])) for conv_layer in conv_layers: test_layer0_output = conv_layer.predict(test_layer0_input, test_size) test_pred_layers.append(test_layer0_output.flatten(2)) test_layer1_input = T.concatenate(test_pred_layers, 1) test_y_pred = classifier.predict(test_layer1_input) test_error = T.mean(T.neq(test_y_pred, y)) test_model_all = theano.function(inputs=[x,y], outputs=[test_error,test_y_pred]) epoch=0 max_f1_score = 0.25 while (epoch < n_epochs): epoch+=1 if shuffle_batch: cost=[] for minibatch_index in numpy.random.permutation(range(n_train_batches)): cost_epoch = train_model(minibatch_index) cost.append(cost_epoch) set_zero(zero_vec) error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),\ y=test_set_y.eval()) precision,recall,f1_score=evaluate.evaluate_multi_class_seedev(prediction=prediction, answer=test_set_y.eval(), claz_count=claz_count) #print 'epoch:%d,error:%.3f,micro_f_score:%.2f,macro_f_score:%.2f'%(epoch,error,micro_f_score,macro_f_score) print 'epoch:%d,error:%.3f,precision:%.4f, recall:%.4f, f1_score:%.4f'%(epoch,error,precision,recall,f1_score) if f1_score > max_f1_score: max_f1_score = f1_score write_matrix_to_file(prediction,'pred_entity.txt') else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) set_zero(zero_vec) # error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),y=test_set_y.eval()) # micro_f_score,macro_f_score=evaluate.simple_evaluate(prediction=prediction, # answer=test_set_y.eval()) print 'epoch:%d,error:%f'%(epoch,error)
def train_nn(datasets, wordvec, word_size=200, hidden_units=[2000,1000,2], dropout_rate=[0,0,0], shuffle_batch=True, n_epochs=3000, batch_size=256, init_learning_rate=0.4, adadelta=True, lr_decay=0.95, sqr_norm_lim=9, activations=[Tanh,Tanh,Tanh], non_static=True, use_valid_set=False, proportion=1): rng = numpy.random.RandomState(3435) #print np.shape(wordvec) #count = np.shape(wordvec)[0] #wordvec=np.random.uniform(-0.25,0.25,(count,5)) #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX) Words=theano.shared(value=wordvec,name='Words') zero_vec_tensor = T.vector() zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX) set_zero = theano.function([zero_vec_tensor], updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))]) x=T.matrix('x') y=T.ivector('y') index=T.lscalar('index') layer0_input= Words[T.cast(x.flatten(),dtype="int32")].\ reshape((x.shape[0],x.shape[1]*Words.shape[1])) #input_printed=theano.printing.Print('layer0_input:')(layer0_input) classifier = MLPDropout(rng, input=layer0_input, layer_sizes=hidden_units, activations=activations, dropout_rates=dropout_rate) params = classifier.params if non_static: params.append(Words) cost = classifier.negative_log_likelihood(y) dropout_cost = classifier.dropout_negative_log_likelihood(y) if adadelta: grad_updates = sgd_updates_adadelta(params, dropout_cost, lr_decay, 1e-6, sqr_norm_lim) else: grad_updates = sgd_updates(params,dropout_cost,init_learning_rate) #print params numpy.random.seed(3435) if datasets[0].shape[0] % batch_size > 0: extra_data_num = batch_size - datasets[0].shape[0] % batch_size train_set = numpy.random.permutation(datasets[0]) ## shuffle extra_data = train_set[:extra_data_num] # the batch new_data=numpy.append(datasets[0],extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍 else: new_data = datasets[0] new_data = numpy.random.permutation(new_data) #train data n_batches = new_data.shape[0]/batch_size #batch num n_train_batches = int(numpy.round(n_batches*proportion)) if len(datasets)==3: use_valid_set=True train_set = new_data val_set = datasets[1] train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0])) val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0])) test_set_x = datasets[2][:,1:] test_set_y = numpy.asarray(datasets[2][:,0],"int32") else: test_set_x = datasets[1][:,1:] test_set_y = numpy.asarray(datasets[1][:,0],"int32") if use_valid_set: train_set = new_data[:n_train_batches*batch_size,:] val_set = new_data[n_train_batches*batch_size:,:] train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0])) val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0])) else: train_set = new_data[:,:] train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0])) n_batches = new_data.shape[0]/batch_size #batch num n_train_batches = int(numpy.round(n_batches)) train_model = theano.function([index], cost, updates=grad_updates, givens={ x: train_set_x[index*batch_size:(index+1)*batch_size], y: train_set_y[index*batch_size:(index+1)*batch_size]}) #theano.printing.debugprint(train_model) #f_scores=[classifier.f_score(y,i+1) for i in xrange(hidden_units[-1]-1)] f_scores =(classifier.f_score(y,i+1)[0] for i in xrange(1)) f_scores = tuple(f_scores) ''' fenzi=0 fenmu=0 for item in f_scores: f_score,precision,recall=item fenzi+=(precision*recall) fenmu+=(precision+recall) ''' #fenzi_printed=theano.printing.Print('fenzi:')(fenzi) #micro_avg_f_score=2.*fenzi / (fenmu+0.000001) test_model=theano.function([],f_scores, givens={ x:test_set_x, y:test_set_y}) ''' valid_model=theano.function([],classifier.errors(y), givens={ x:test_set_x, y:test_set_y}) ''' #theano.printing.debugprint(test_model) #print micro_avg_f_score.owner.inputs #test_y_pred = classifier.f_score() #test_model_all = theano.function([x,y], test_error) epoch=0 while (epoch < n_epochs): epoch = epoch + 1 if shuffle_batch: cost=[] for minibatch_index in numpy.random.permutation(range(n_train_batches)): cost_epoch = train_model(minibatch_index) cost.append(cost_epoch) set_zero(zero_vec) print 'epoch:%d, cost value:%f, '%(epoch,numpy.mean(cost)), else: for minibatch_index in xrange(n_train_batches): cost_epoch = train_model(minibatch_index) print cost_epoch set_zero(zero_vec) #print test_model() f_scores = test_model() f_scores = tuple(f_scores) print '%.2f,'*1%(f_scores) layer0_input= Words[T.cast(test_set_x.flatten(),dtype="int32")].\ reshape((test_set_x.shape[0],test_set_x.shape[1]*Words.shape[1])) t_pred=classifier.predict(layer0_input) write_matrix_to_file(t_pred.eval(),'pred.txt') write_matrix_to_file(test_set_y,'real.txt')