Exemplo n.º 1
0
targetTypesFile=config['typefile']
vectorFile=config['ent_vectors']
learning_rate = float(config['lrate'])
batch_size = 1#int(config['batchsize'])
n_epochs = int(config['nepochs'])
num_neg = int(config['numneg'])

testfile=sys.argv[2]
outf=sys.argv[3]
use_tanh_out = False
outputtype = config['outtype'] #hinge or softmax
usetypecosine = False
if 'typecosine' in config:
    usetypecosine = utils.str_to_bool(config['typecosine'])

(t2ind, n_targets, wordvectors, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile)
(rvt, input_matrix_test, iet,resvectstnall, ntrn) = utils.fillOnlyEntityData(testfile,vectorsize, wordvectors, t2ind, n_targets, upto=-1, ds='test', binoutvec=True)

# train network
rng = numpy.random.RandomState(23455)
if usetypecosine:
    print 'using cosine(e,t) as another input feature'
    typevecmatrix = utils.buildtypevecmatrix(t2ind, wordvectors, vectorsize) # a matrix with size: 102 * dim 
    e2simmatrix_test = utils.buildcosinematrix(input_matrix_test, typevecmatrix)
    input_matrix_test = utils.extend_in_matrix(input_matrix_test, e2simmatrix_test)

dt = theano.config.floatX  # @UndefinedVariable

index = T.lscalar()  # index to a [mini]batch
x = T.matrix('x')  # the data is presented as rasterized images
y = T.imatrix('y')  # the labels are presented as 1D vector of
Exemplo n.º 2
0
    l_weight = float(config['loss_weight'])
    
use_tanh_out = False
if 'tanh' in config:
    use_tanh_out = True     

outputtype = config['outtype'] #hinge or softmax
leftsize = int(config['left'])
rightsize = int(config['right'])
sum_window = int(config['sumwindow'])
max_window = int(config['maxwindow'])
slotposition = max_window / 2

label_type = config['target'] #nt or ct

(typeIndMap, n_targets, wordvecs, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile, -1)
logger.info('word2vec vectors are loaded')
(testcontexts,resultVectorTest, resultVectorTestAll) = utils.read_lines_data(testsetfile, typeIndMap, max_window, label_type, -1)
logger.info("number of test examples: %d", len(testcontexts))

inputMatrixTest = utils.adeltheanomatrix_flexible(slotposition, vectorsize, testcontexts, wordvecs, leftsize, rightsize, sum_window, useSum)
contextsize = leftsize + rightsize + 1
################# for memory ############
testcontexts = []; wordvecs = []; 
##################### the network #######################
test_set_x = theano.shared(numpy.matrix(inputMatrixTest, dtype=theano.config.floatX))  # @UndefinedVariable
test_set_y = theano.shared(numpy.matrix(resultVectorTestAll, dtype=numpy.dtype(numpy.int32)))

rng = numpy.random.RandomState(23455)

n_test_batches = test_set_x.get_value(borrow=True).shape[0]
Exemplo n.º 3
0
use_tanh_out = False
if 'tanh' in config:
    use_tanh_out = True

outputtype = config['outtype']  #hinge or softmax
leftsize = int(config['left'])
rightsize = int(config['right'])
sum_window = int(config['sumwindow'])

label_type = config['target']  #nt or ct

max_window = int(config['maxwindow'])
slotposition = max_window / 2
logger.info('slotpostion is %d', slotposition)
(typeIndMap, n_targets, wordvecs, vectorsize,
 typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile)

(traincontexts, resultVectorTrain,
 resultVectorTrainAll) = utils.read_lines_data(trainfile, typeIndMap,
                                               max_window, label_type, -1)
logger.info("number of training examples: %d", len(traincontexts))
inputMatrixTrain = utils.adeltheanomatrix_flexible(slotposition, vectorsize,
                                                   traincontexts, wordvecs,
                                                   leftsize, rightsize,
                                                   sum_window, useSum)

(contextlistDev, resultVectorDev,
 resultVectorDevAll) = utils.read_lines_data(devfile, typeIndMap, max_window,
                                             label_type, -1)
logger.info("number of validation examples: %d", len(contextlistDev))
Exemplo n.º 4
0
l_reg = ''
l_weight = 0.000001
if 'loss_reg' in config:
    l_reg = config['loss_reg']
    l_weight = float(config['loss_weight'])
    
use_tanh_out = False
if 'tanh' in config:
    use_tanh_out = True  
outputtype = config['outtype'] #hinge or softmax
usetypecosine = False
if 'typecosine' in config:
    usetypecosine = utils.str_to_bool(config['typecosine'])
    
upto = -1
(t2ind, n_targets, wordvectors, vectorsize, typefreq_traindev) = utils.loadTypesAndVectors(targetTypesFile, vectorFile, upto=upto)

(rvt, input_matrix_train, iet,resvectrnall, ntrn) = utils.fillOnlyEntityData(trainfile,vectorsize, wordvectors, t2ind, n_targets, upto=upto, binoutvec=True)
print "number of training examples:" + str(len(iet))

(rvd, input_matrix_dev, ied,resvecdevall, ntdev) = utils.fillOnlyEntityData(devfile,vectorsize, wordvectors, t2ind, n_targets, upto=upto, binoutvec=True)
print "number of validation examples:" +  str(len(ied))

if usetypecosine:
    print 'using cosine(e,t) as another input feature'
    typevecmatrix = utils.buildtypevecmatrix(t2ind, wordvectors, vectorsize) # a matrix with size: 102 * dim 
    e2simmatrix_train = utils.buildcosinematrix(input_matrix_train, typevecmatrix)
    e2simmatrix_dev = utils.buildcosinematrix(input_matrix_dev, typevecmatrix)
    input_matrix_train = utils.extend_in_matrix(input_matrix_train, e2simmatrix_train)
    input_matrix_dev = utils.extend_in_matrix(input_matrix_dev, e2simmatrix_dev)