예제 #1
0
    def __init__(self,
                 name,
                 input_dim,
                 hidden_dim,
                 opts,
                 update_fn,
                 h0,
                 inputs,
                 context=None,
                 context_dim=None):
        self.name_ = name
        self.update_fn = update_fn
        self.h0 = h0
        self.inputs = inputs  # input sequence
        self.context = context  # additional context to add at each timestep of input

        # params for standard recurrent step
        self.Uh = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Uh',
                                    orthogonal_init=True)
        self.Wh = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wh',
                                    orthogonal_init=True)
        self.bh = util.shared(util.zeros((hidden_dim, )), 'bh')

        # params for context; if applicable
        if self.context:
            self.Wch = util.sharedMatrix(hidden_dim,
                                         context_dim,
                                         'Wch',
                                         orthogonal_init=True)

        # params for reset gate; initial bias to not reset
        self.Ur = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Ur',
                                    orthogonal_init=True)
        self.Wr = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wr',
                                    orthogonal_init=True)
        self.br = util.shared(np.asarray([opts.gru_initial_bias] * hidden_dim),
                              'br')

        # params for carry gate; initial bias to never carry h_t_minus_1
        self.Uz = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Uz',
                                    orthogonal_init=True)
        self.Wz = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wz',
                                    orthogonal_init=True)
        self.bz = util.shared(np.asarray([opts.gru_initial_bias] * hidden_dim),
                              'bz')
예제 #2
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              initial_embeddings_file=None,
              train_embeddings=True):
     if not train_embeddings and initial_embeddings_file is None:
         print >> sys.stderr, "WARNING: not training embedding without initial embeddings"
     self.train_embeddings = train_embeddings
     if initial_embeddings_file:
         e = np.load(initial_embeddings_file)
         assert e.shape[
             0] == vocab_size, "vocab mismatch size? loaded=%s expected=%s" % (
                 e.shape[0], vocab_size)
         # TODO code could handle this but just not wanting --embedding-dim set
         # when using init embeddings
         assert e.shape[
             1] == embedding_dim, "dimensionality config error. loaded embeddings %s d but --embedding-dim set to %s d" % (
                 e.shape[1], embedding_dim)
         assert e.dtype == np.float32, "%s" % e.dtype
         self.shared_embeddings = util.shared(e, 'tied_embeddings')
     else:
         self.shared_embeddings = util.sharedMatrix(vocab_size,
                                                    embedding_dim,
                                                    'tied_embeddings',
                                                    orthogonal_init=True)
예제 #3
0
    def __init__(self, inp, n_labels, n_hidden_previous, update_fn,
                 training=None, keep_prob=None):
        if type(inp) == list:
            self.input = T.concatenate(inp)
            input_size = len(inp) * n_hidden_previous
        else:
            self.input = inp
            input_size = n_hidden_previous

        if training is not None:
            assert keep_prob is not None
            self.input = dropout(self.input, training, keep_prob)

        self.update_fn = update_fn

        # input -> hidden (sized somwhere between size of input & softmax)
        n_hidden = int(math.sqrt(input_size * n_labels))
        print "concat sizing %s -> %s -> %s" % (input_size, n_hidden, n_labels)
        self.Wih = util.sharedMatrix(input_size, n_hidden, 'Wih')
        self.bh = util.shared(util.zeros((1, n_hidden)), 'bh')
        # hidden -> softmax
        self.Whs = util.sharedMatrix(n_hidden, n_labels, 'Whs')
        self.bs = util.shared(util.zeros((1, n_labels)), 'bs')
예제 #4
0
    def __init__(self,
                 name,
                 input_dim,
                 hidden_dim,
                 opts,
                 update_fn,
                 h0,
                 inputs,
                 context=None,
                 context_dim=None):
        self.name_ = name
        self.update_fn = update_fn
        self.h0 = h0
        self.inputs = inputs  # input sequence
        self.context = context  # additional context to add at each timestep of input

        # hidden -> hidden
        self.Uh = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Uh',
                                    orthogonal_init=True)

        # embedded input -> hidden
        self.Wh = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wh',
                                    orthogonal_init=True)

        # context -> hidden (if applicable)
        if self.context:
            self.Whc = util.sharedMatrix(hidden_dim,
                                         context_dim,
                                         'Wch',
                                         orthogonal_init=True)

        # bias
        self.bh = util.shared(util.zeros((hidden_dim, )), 'bh')