示例#1
0
import numpy as np
import theano
import theano.tensor as T
import lxmls.deep_learning.rnn as rnns

#
# DEFINE MODEL
#

# Extract word embeddings for the vocabulary used. Download embeddings if
# not available.
import os
if not os.path.isfile('data/senna_50'):
    rnns.download_embeddings('senna_50', 'data/senna_50')
E = rnns.extract_embeddings('data/senna_50', train_seq.x_dict)

# CONFIG
n_words = E.shape[0]  # Number of words
n_emb = E.shape[1]  # Size of word embeddings
n_hidd = 20  # Size of the recurrent layer
n_tags = len(train_seq.y_dict.keys())  # Number of POS tags
# SYMBOLIC VARIABLES
_x = T.ivector('x')  # Input words indices
# Define the RNN
rnn = rnns.RNN(E, n_hidd, n_tags)
# Forward
_p_y = rnn._forward(_x)

#
# DEFINE TRAINING
示例#2
0
import numpy as np
import theano
import theano.tensor as T
import lxmls.deep_learning.rnn as rnns

#
# DEFINE MODEL
#

# Extract word embeddings for the vocabulary used. Download embeddings if
# not available.
import os
if not os.path.isfile('data/senna_50'):
    rnns.download_embeddings('senna_50','data/senna_50')
E = rnns.extract_embeddings('data/senna_50', train_seq.x_dict)

# CONFIG 
n_words = E.shape[0]                        # Number of words
n_emb   = E.shape[1]                        # Size of word embeddings
n_hidd  = 20                                # Size of the recurrent layer
n_tags  = len(train_seq.y_dict.keys())      # Number of POS tags
# SYMBOLIC VARIABLES
_x      = T.ivector('x')                    # Input words indices
# Define the RNN
rnn     = rnns.RNN(E, n_hidd, n_tags)
# Forward
_p_y    = rnn._forward(_x)

#
# DEFINE TRAINING 
示例#3
0
    root_os_sep = root + os.sep
else:
    root_os_sep = ''
corpus    = pcc.PostagCorpus()
train_seq = corpus.read_sequence_list_conll(root_os_sep + "data" + os.sep + "train-02-21.conll",
                                            max_sent_len=15, max_nr_sent=1000)
test_seq  = corpus.read_sequence_list_conll(root_os_sep + "data" + os.sep + "test-23.conll",
                                            max_sent_len=15, max_nr_sent=1000)
dev_seq   = corpus.read_sequence_list_conll(root_os_sep + "data" + os.sep + "dev-22.conll",
                                            max_sent_len=15, max_nr_sent=1000)

import lxmls.deep_learning.rnn as rnns

if not os.path.isfile(root_os_sep+'data'+os.sep+'senna_50'):
    rnns.download_embeddings('senna_50', root_os_sep+'data'+os.sep+'senna_50')
E = rnns.extract_embeddings(root_os_sep+'data'+os.sep+'senna_50', train_seq.x_dict)
E = rnns.extract_embeddings(root_os_sep+'data'+os.sep+'senna_50', train_seq.x_dict)

# CONFIG 
n_words = E.shape[0]                        # Number of words
n_emb   = E.shape[1]                        # Size of word embeddings
n_hidd  = 20                                # Size of the recurrent layer
n_tags  = len(train_seq.y_dict.keys())      # Number of POS tags
seed = 0                                    # seed to initalize rnn parameters

# Test NumpyRNN() with the sample=0
sample = 0                  # sample to be tested
x0 = train_seq[sample].x    # first sample input (vector of integers)
y0 = train_seq[sample].y    # first sample output (vector of integers)

rnn = rnns.NumpyRNN(E, n_hidd, n_tags, seed=seed)