示例#1
0
 index=T.lscalar('index')
 
 layer0_input= Words[T.cast(x.flatten(),dtype="int32")].\
               reshape((x.shape[0],x.shape[1]*Words.shape[1]))
 #input_printed=theano.printing.Print('layer0_input:')(layer0_input)
 classifier = MLPDropout(rng, 
                         input=layer0_input, 
                         layer_sizes=hidden_units,
                         activations=activations, 
                         dropout_rates=dropout_rate)
 params = classifier.params
 
 if non_static:
     params.append(Words)
     
 cost = classifier.negative_log_likelihood(y) 
 dropout_cost = classifier.dropout_negative_log_likelihood(y)  
 if adadelta:
     grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                         lr_decay, 1e-6, sqr_norm_lim)
 else:
     grad_updates = sgd_updates(params,dropout_cost,init_learning_rate)
 #print params
 numpy.random.seed(3435)
 if datasets[0].shape[0] % batch_size > 0:
     extra_data_num = batch_size - datasets[0].shape[0] % batch_size
     train_set = numpy.random.permutation(datasets[0]) ## shuffle  
     extra_data = train_set[:extra_data_num] # the batch
     new_data=numpy.append(datasets[0],extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
 else:
     new_data = datasets[0]
示例#2
0
def train_conv(datasets,
               wordvec,
               word_size=150,
               window_sizes=[9,11,13],
               hidden_units=[100,100,claz_count],
               dropout_rate=[0],
               shuffle_batch=True,
               n_epochs=10000,
               batch_size=256,
               lr_decay=0.95,
               sqr_norm_lim=9,
               conv_non_linear="relu",
               activations=[Tanh],#dropout
               non_static=True,
               proportion=1):
    rng = numpy.random.RandomState(3435)
    
    sen_length = len(datasets[0][0])-1  # sentence length
    filter_w = word_size # filter width
    feature_maps = hidden_units[0]
    filter_shapes = [] #filter:param W
    pool_sizes = []
    for filter_h in window_sizes: # filter heighth
        filter_shapes.append((feature_maps, 1, filter_h,filter_w))
        pool_sizes.append((sen_length-filter_h+1, 1))
    parameters = [("image shape",sen_length,word_size),("filter shape",filter_shapes), ("hidden_units",hidden_units),
                  ("dropout", dropout_rate), ("batch_size",batch_size),("non_static", non_static),
                    ("learn_decay",lr_decay), ("conv_non_linear", conv_non_linear), ("non_static", non_static)
                    ,("sqr_norm_lim",sqr_norm_lim),("shuffle_batch",shuffle_batch)]
    print parameters  

    #print wordvec
    #count = np.shape(wordvec)[0]
    #wordvec=np.random.uniform(-0.25,0.25,(count,50))
    #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX)
    Words=theano.shared(value=wordvec,name='Words')
    zero_vec_tensor = T.vector()
    zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], 
                               updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))])

    x=T.matrix('x')
    y=T.ivector('y')
    index=T.lscalar('index')
    
    layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                        reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    #theano.printing.debugprint(layer0_input)
    conv_layers=[]
    layer1_inputs=[]
    for i in xrange(len(window_sizes)):
        filter_shape = filter_shapes[i]
        pool_size = pool_sizes[i]
        conv_layer = LeNetConvPoolLayer(rng, input=layer0_input,image_shape=(batch_size, 1, sen_length, word_size),
                                filter_shape=filter_shape, poolsize=pool_size, non_linear=conv_non_linear)
        layer1_input = conv_layer.output.flatten(2)
        conv_layers.append(conv_layer)
        layer1_inputs.append(layer1_input)
    layer1_input = T.concatenate(layer1_inputs,1)
    hidden_units[0] = feature_maps*len(window_sizes)
    #print hidden_units
    classifier = MLPDropout(rng, 
                            input=layer1_input, 
                            layer_sizes=hidden_units,
                            activations=activations, 
                            dropout_rates=dropout_rate)
    params = classifier.params   
    for conv_layer in conv_layers:
        params += conv_layer.params
    if non_static:
        params += [Words]

    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)           
    grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                        lr_decay, 1e-6, sqr_norm_lim)
    
    numpy.random.seed(3435)

    test_set = datasets[1]
    train_set = datasets[0]
        
    if train_set.shape[0] % batch_size > 0:
        extra_data_num = batch_size - train_set.shape[0] % batch_size
        train_set = numpy.random.permutation(train_set)
        extra_data = train_set[:extra_data_num] # the batch
        new_data=numpy.append(train_set,extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
    else:
        new_data = train_set
    #train_set = numpy.random.permutation(new_data)
    train_set_x = new_data[:,1:]
    test_set_x = test_set[:,1:]
    train_set_x, train_set_y = shared_dataset((train_set_x,new_data[:,0]))
    test_set_x, test_set_y = shared_dataset((test_set_x,test_set[:,0]))
    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))
    
    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})
    #theano.printing.debugprint(train_model)


    test_pred_layers = []
    test_size = test_set_x.shape[0].eval()
    test_layer0_input = Words[T.cast(x.flatten(),dtype="int32")].\
                              reshape((x.shape[0],1,x.shape[1],Words.shape[1]))
    for conv_layer in conv_layers:
        test_layer0_output = conv_layer.predict(test_layer0_input, test_size)
        test_pred_layers.append(test_layer0_output.flatten(2))
    test_layer1_input = T.concatenate(test_pred_layers, 1)
    test_y_pred = classifier.predict(test_layer1_input)
    test_error = T.mean(T.neq(test_y_pred, y))
    test_model_all = theano.function(inputs=[x,y], outputs=[test_error,test_y_pred])   
       
    epoch=0
    max_f1_score = 0.25
    while (epoch < n_epochs):
        epoch+=1        
        if shuffle_batch:
            cost=[]
            for minibatch_index in numpy.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                cost.append(cost_epoch)
                set_zero(zero_vec)
            error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),\
                                            y=test_set_y.eval())
            precision,recall,f1_score=evaluate.evaluate_multi_class_seedev(prediction=prediction,
                                                                          answer=test_set_y.eval(),
                                                                          claz_count=claz_count)
            #print 'epoch:%d,error:%.3f,micro_f_score:%.2f,macro_f_score:%.2f'%(epoch,error,micro_f_score,macro_f_score)
            print 'epoch:%d,error:%.3f,precision:%.4f,  recall:%.4f,  f1_score:%.4f'%(epoch,error,precision,recall,f1_score)
            if f1_score > max_f1_score:
                max_f1_score = f1_score
                write_matrix_to_file(prediction,'pred_entity.txt')
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                set_zero(zero_vec)
#            error,prediction=test_model_all(x=test_set_x.get_value(borrow=True),y=test_set_y.eval())
#            micro_f_score,macro_f_score=evaluate.simple_evaluate(prediction=prediction,
#                                                                 answer=test_set_y.eval())
            print 'epoch:%d,error:%f'%(epoch,error)
def train_nn(datasets,
             wordvec,
             word_size=200,
             hidden_units=[2000,1000,2],
             dropout_rate=[0,0,0],
             shuffle_batch=True,
             n_epochs=3000,
             batch_size=256,
             init_learning_rate=0.4,
             adadelta=True,
             lr_decay=0.95,
             sqr_norm_lim=9,
             activations=[Tanh,Tanh,Tanh],
             non_static=True,
             use_valid_set=False,
             proportion=1):
    rng = numpy.random.RandomState(3435)
    #print np.shape(wordvec)
    #count = np.shape(wordvec)[0]
    #wordvec=np.random.uniform(-0.25,0.25,(count,5))
    #wordvec=numpy.asarray(wordvec,dtype=theano.config.floatX)
    Words=theano.shared(value=wordvec,name='Words')
    zero_vec_tensor = T.vector()
    zero_vec = numpy.zeros(word_size,dtype=theano.config.floatX)
    set_zero = theano.function([zero_vec_tensor], 
                               updates=[(Words, T.set_subtensor(Words[0,:], zero_vec_tensor))])
    x=T.matrix('x')
    y=T.ivector('y')
    index=T.lscalar('index')
    
    layer0_input= Words[T.cast(x.flatten(),dtype="int32")].\
                  reshape((x.shape[0],x.shape[1]*Words.shape[1]))
    #input_printed=theano.printing.Print('layer0_input:')(layer0_input)
    classifier = MLPDropout(rng, 
                            input=layer0_input, 
                            layer_sizes=hidden_units,
                            activations=activations, 
                            dropout_rates=dropout_rate)
    params = classifier.params
    
    if non_static:
        params.append(Words)
        
    cost = classifier.negative_log_likelihood(y) 
    dropout_cost = classifier.dropout_negative_log_likelihood(y)  
    if adadelta:
        grad_updates = sgd_updates_adadelta(params, dropout_cost, 
                                            lr_decay, 1e-6, sqr_norm_lim)
    else:
        grad_updates = sgd_updates(params,dropout_cost,init_learning_rate)
    #print params
    numpy.random.seed(3435)
    if datasets[0].shape[0] % batch_size > 0:
        extra_data_num = batch_size - datasets[0].shape[0] % batch_size
        train_set = numpy.random.permutation(datasets[0]) ## shuffle  
        extra_data = train_set[:extra_data_num] # the batch
        new_data=numpy.append(datasets[0],extra_data,axis=0) #使得训练集个数正好是batch_size的整数倍
    else:
        new_data = datasets[0]
    
    new_data = numpy.random.permutation(new_data) #train data
    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches*proportion))
    
    if len(datasets)==3:
        use_valid_set=True
        train_set = new_data
        val_set = datasets[1]
        train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))
        val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0]))
        test_set_x = datasets[2][:,1:] 
        test_set_y = numpy.asarray(datasets[2][:,0],"int32")
    else:
        test_set_x = datasets[1][:,1:] 
        test_set_y = numpy.asarray(datasets[1][:,0],"int32")
        if use_valid_set:
            train_set = new_data[:n_train_batches*batch_size,:]
            val_set = new_data[n_train_batches*batch_size:,:]     
            train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))
            val_set_x, val_set_y = shared_dataset((val_set[:,1:],val_set[:,0]))
        else:
            train_set = new_data[:,:]
            train_set_x, train_set_y = shared_dataset((train_set[:,1:],train_set[:,0]))

    n_batches = new_data.shape[0]/batch_size #batch num
    n_train_batches = int(numpy.round(n_batches))

    train_model = theano.function([index], cost, updates=grad_updates,
          givens={
            x: train_set_x[index*batch_size:(index+1)*batch_size],
            y: train_set_y[index*batch_size:(index+1)*batch_size]})   
    #theano.printing.debugprint(train_model)
            
    #f_scores=[classifier.f_score(y,i+1) for i in xrange(hidden_units[-1]-1)]
    f_scores =(classifier.f_score(y,i+1)[0] for i in xrange(1))
    f_scores = tuple(f_scores)
    '''    
    fenzi=0
    fenmu=0    
    for item in f_scores:
        f_score,precision,recall=item
        fenzi+=(precision*recall)
        fenmu+=(precision+recall)
    '''
    #fenzi_printed=theano.printing.Print('fenzi:')(fenzi)
    #micro_avg_f_score=2.*fenzi / (fenmu+0.000001)
    test_model=theano.function([],f_scores,
                               givens={
                               x:test_set_x,
                               y:test_set_y})
    '''
    valid_model=theano.function([],classifier.errors(y),
                                givens={
                                x:test_set_x,
                                y:test_set_y})
    '''
    #theano.printing.debugprint(test_model)
    #print micro_avg_f_score.owner.inputs
    #test_y_pred = classifier.f_score()
    #test_model_all = theano.function([x,y], test_error)   

    epoch=0
    while (epoch < n_epochs):        
        epoch = epoch + 1
        if shuffle_batch:
            cost=[]
            for minibatch_index in numpy.random.permutation(range(n_train_batches)):
                cost_epoch = train_model(minibatch_index)
                cost.append(cost_epoch)
                set_zero(zero_vec)
            print 'epoch:%d, cost value:%f, '%(epoch,numpy.mean(cost)),
        else:
            for minibatch_index in xrange(n_train_batches):
                cost_epoch = train_model(minibatch_index)
                print cost_epoch
                set_zero(zero_vec)
        #print test_model()
        f_scores = test_model()
        f_scores = tuple(f_scores)
        print '%.2f,'*1%(f_scores)
        
    layer0_input= Words[T.cast(test_set_x.flatten(),dtype="int32")].\
                  reshape((test_set_x.shape[0],test_set_x.shape[1]*Words.shape[1]))
    t_pred=classifier.predict(layer0_input)
    write_matrix_to_file(t_pred.eval(),'pred.txt')
    write_matrix_to_file(test_set_y,'real.txt')