# import numpy as np import theano import theano.tensor as T import lxmls.deep_learning.rnn as rnns # # DEFINE MODEL # # Extract word embeddings for the vocabulary used. Download embeddings if # not available. import os if not os.path.isfile('data/senna_50'): rnns.download_embeddings('senna_50', 'data/senna_50') E = rnns.extract_embeddings('data/senna_50', train_seq.x_dict) # CONFIG n_words = E.shape[0] # Number of words n_emb = E.shape[1] # Size of word embeddings n_hidd = 20 # Size of the recurrent layer n_tags = len(train_seq.y_dict.keys()) # Number of POS tags # SYMBOLIC VARIABLES _x = T.ivector('x') # Input words indices # Define the RNN rnn = rnns.RNN(E, n_hidd, n_tags) # Forward _p_y = rnn._forward(_x) #
# import numpy as np import theano import theano.tensor as T import lxmls.deep_learning.rnn as rnns # # DEFINE MODEL # # Extract word embeddings for the vocabulary used. Download embeddings if # not available. import os if not os.path.isfile('data/senna_50'): rnns.download_embeddings('senna_50','data/senna_50') E = rnns.extract_embeddings('data/senna_50', train_seq.x_dict) # CONFIG n_words = E.shape[0] # Number of words n_emb = E.shape[1] # Size of word embeddings n_hidd = 20 # Size of the recurrent layer n_tags = len(train_seq.y_dict.keys()) # Number of POS tags # SYMBOLIC VARIABLES _x = T.ivector('x') # Input words indices # Define the RNN rnn = rnns.RNN(E, n_hidd, n_tags) # Forward _p_y = rnn._forward(_x) #
if root: root_os_sep = root + os.sep else: root_os_sep = '' corpus = pcc.PostagCorpus() train_seq = corpus.read_sequence_list_conll(root_os_sep + "data" + os.sep + "train-02-21.conll", max_sent_len=15, max_nr_sent=1000) test_seq = corpus.read_sequence_list_conll(root_os_sep + "data" + os.sep + "test-23.conll", max_sent_len=15, max_nr_sent=1000) dev_seq = corpus.read_sequence_list_conll(root_os_sep + "data" + os.sep + "dev-22.conll", max_sent_len=15, max_nr_sent=1000) import lxmls.deep_learning.rnn as rnns if not os.path.isfile(root_os_sep+'data'+os.sep+'senna_50'): rnns.download_embeddings('senna_50', root_os_sep+'data'+os.sep+'senna_50') E = rnns.extract_embeddings(root_os_sep+'data'+os.sep+'senna_50', train_seq.x_dict) E = rnns.extract_embeddings(root_os_sep+'data'+os.sep+'senna_50', train_seq.x_dict) # CONFIG n_words = E.shape[0] # Number of words n_emb = E.shape[1] # Size of word embeddings n_hidd = 20 # Size of the recurrent layer n_tags = len(train_seq.y_dict.keys()) # Number of POS tags seed = 0 # seed to initalize rnn parameters # Test NumpyRNN() with the sample=0 sample = 0 # sample to be tested x0 = train_seq[sample].x # first sample input (vector of integers) y0 = train_seq[sample].y # first sample output (vector of integers)