예제 #1
0
 def __init__(self,
              vocab_size,
              embedding_dim,
              initial_embeddings_file=None,
              train_embeddings=True):
     if not train_embeddings and initial_embeddings_file is None:
         print >> sys.stderr, "WARNING: not training embedding without initial embeddings"
     self.train_embeddings = train_embeddings
     if initial_embeddings_file:
         e = np.load(initial_embeddings_file)
         assert e.shape[
             0] == vocab_size, "vocab mismatch size? loaded=%s expected=%s" % (
                 e.shape[0], vocab_size)
         # TODO code could handle this but just not wanting --embedding-dim set
         # when using init embeddings
         assert e.shape[
             1] == embedding_dim, "dimensionality config error. loaded embeddings %s d but --embedding-dim set to %s d" % (
                 e.shape[1], embedding_dim)
         assert e.dtype == np.float32, "%s" % e.dtype
         self.shared_embeddings = util.shared(e, 'tied_embeddings')
     else:
         self.shared_embeddings = util.sharedMatrix(vocab_size,
                                                    embedding_dim,
                                                    'tied_embeddings',
                                                    orthogonal_init=True)
예제 #2
0
 def __init__(self, n_in, n_embedding, n_hidden, orthogonal_init):
     self.Wx = util.sharedMatrix(n_in, n_embedding, 'Wx', orthogonal_init)
     self.Wz = util.sharedMatrix(n_in, n_hidden, 'Wz', orthogonal_init)
     self.Wr = util.sharedMatrix(n_in, n_hidden, 'Wr', orthogonal_init)
     self.Ux = util.sharedMatrix(n_hidden, n_embedding, 'Ux',
                                 orthogonal_init)
     self.Uz = util.sharedMatrix(n_hidden, n_hidden, 'Uz', orthogonal_init)
     self.Ur = util.sharedMatrix(n_hidden, n_hidden, 'Ur', orthogonal_init)
     self.Wy = util.sharedMatrix(n_in, n_hidden, 'Wy', orthogonal_init)
예제 #3
0
    def __init__(self,
                 name,
                 input_dim,
                 hidden_dim,
                 opts,
                 update_fn,
                 h0,
                 inputs,
                 context=None,
                 context_dim=None):
        self.name_ = name
        self.update_fn = update_fn
        self.h0 = h0
        self.inputs = inputs  # input sequence
        self.context = context  # additional context to add at each timestep of input

        # params for standard recurrent step
        self.Uh = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Uh',
                                    orthogonal_init=True)
        self.Wh = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wh',
                                    orthogonal_init=True)
        self.bh = util.shared(util.zeros((hidden_dim, )), 'bh')

        # params for context; if applicable
        if self.context:
            self.Wch = util.sharedMatrix(hidden_dim,
                                         context_dim,
                                         'Wch',
                                         orthogonal_init=True)

        # params for reset gate; initial bias to not reset
        self.Ur = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Ur',
                                    orthogonal_init=True)
        self.Wr = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wr',
                                    orthogonal_init=True)
        self.br = util.shared(np.asarray([opts.gru_initial_bias] * hidden_dim),
                              'br')

        # params for carry gate; initial bias to never carry h_t_minus_1
        self.Uz = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Uz',
                                    orthogonal_init=True)
        self.Wz = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wz',
                                    orthogonal_init=True)
        self.bz = util.shared(np.asarray([opts.gru_initial_bias] * hidden_dim),
                              'bz')
예제 #4
0
    def __init__(self, name, input_dim, hidden_dim, opts, update_fn, h0, inputs,
                 context=None, context_dim=None):
        self.name_ = name
        self.update_fn = update_fn
        self.h0 = h0
        self.inputs = inputs    # input sequence
        self.context = context  # additional context to add at each timestep of input

        # hidden -> hidden
        self.Uh = util.sharedMatrix(hidden_dim, hidden_dim, 'Uh', orthogonal_init=True)

        # embedded input -> hidden
        self.Wh = util.sharedMatrix(hidden_dim, input_dim, 'Wh', orthogonal_init=True)

        # context -> hidden (if applicable)
        if self.context:
            self.Whc = util.sharedMatrix(hidden_dim, context_dim, 'Wch',
                                         orthogonal_init=True)

        # bias
        self.bh = util.shared(util.zeros((hidden_dim,)), 'bh')
예제 #5
0
    def __init__(self, inp, n_labels, n_hidden_previous, update_fn,
                 training=None, keep_prob=None):
        if type(inp) == list:
            self.input = T.concatenate(inp)
            input_size = len(inp) * n_hidden_previous
        else:
            self.input = inp
            input_size = n_hidden_previous

        if training is not None:
            assert keep_prob is not None
            self.input = dropout(self.input, training, keep_prob)

        self.update_fn = update_fn

        # input -> hidden (sized somwhere between size of input & softmax)
        n_hidden = int(math.sqrt(input_size * n_labels))
        print "concat sizing %s -> %s -> %s" % (input_size, n_hidden, n_labels)
        self.Wih = util.sharedMatrix(input_size, n_hidden, 'Wih')
        self.bh = util.shared(util.zeros((1, n_hidden)), 'bh')
        # hidden -> softmax
        self.Whs = util.sharedMatrix(n_hidden, n_labels, 'Whs')
        self.bs = util.shared(util.zeros((1, n_labels)), 'bs')
예제 #6
0
    def __init__(self,
                 name,
                 input_dim,
                 hidden_dim,
                 opts,
                 update_fn,
                 h0,
                 inputs,
                 context=None,
                 context_dim=None):
        self.name_ = name
        self.update_fn = update_fn
        self.h0 = h0
        self.inputs = inputs  # input sequence
        self.context = context  # additional context to add at each timestep of input

        # hidden -> hidden
        self.Uh = util.sharedMatrix(hidden_dim,
                                    hidden_dim,
                                    'Uh',
                                    orthogonal_init=True)

        # embedded input -> hidden
        self.Wh = util.sharedMatrix(hidden_dim,
                                    input_dim,
                                    'Wh',
                                    orthogonal_init=True)

        # context -> hidden (if applicable)
        if self.context:
            self.Whc = util.sharedMatrix(hidden_dim,
                                         context_dim,
                                         'Wch',
                                         orthogonal_init=True)

        # bias
        self.bh = util.shared(util.zeros((hidden_dim, )), 'bh')
예제 #7
0
 def __init__(self, n_in, n_embedding, n_hidden, orthogonal_init):
     # forward pass
     self.Wx_f = util.sharedMatrix(n_in, n_embedding, 'Wx_f', orthogonal_init)
     self.Wrec_f = util.sharedMatrix(n_hidden, n_embedding, 'Wrec_f', orthogonal_init)
     self.Wy_f = util.sharedMatrix(n_in, n_hidden, 'Wy_f', orthogonal_init)
     # backwards pass
     self.Wx_b = util.sharedMatrix(n_in, n_embedding, 'Wx_b', orthogonal_init)
     self.Wrec_b = util.sharedMatrix(n_hidden, n_embedding, 'Wrec_b', orthogonal_init)
     self.Wy_b = util.sharedMatrix(n_in, n_hidden, 'Wy_b', orthogonal_init)
예제 #8
0
    def __init__(self, vocab_size, embedding_dim,
                 idxs=None, sequence_embeddings=None):
        assert (idxs is None) ^ (sequence_embeddings is None)
        #self.name = name

        if idxs is not None:
            # not tying weights, build our own set of embeddings
            self.Wx = util.sharedMatrix(vocab_size, embedding_dim, 'Wx',
                                        orthogonal_init=True)
            self.sequence_embeddings = self.Wx[idxs]
            self.using_shared_embeddings = False
        else:
            # using tied weights, we won't be handling the update
            self.sequence_embeddings = sequence_embeddings
            self.using_shared_embeddings = True
예제 #9
0
 def __init__(self, vocab_size, embedding_dim, initial_embeddings_file=None, 
              train_embeddings=True):
     if not train_embeddings and initial_embeddings_file is None:
         print >>sys.stderr, "WARNING: not training embedding without initial embeddings"
     self.train_embeddings = train_embeddings
     if initial_embeddings_file:
         e = np.load(initial_embeddings_file)
         assert e.shape[0] == vocab_size, "vocab mismatch size? loaded=%s expected=%s" % (e.shape[0], vocab_size)
         # TODO code could handle this but just not wanting --embedding-dim set
         # when using init embeddings
         assert e.shape[1] == embedding_dim, "dimensionality config error. loaded embeddings %s d but --embedding-dim set to %s d" % (e.shape[1], embedding_dim)
         assert e.dtype == np.float32, "%s" % e.dtype
         self.shared_embeddings = util.shared(e, 'tied_embeddings')
     else:
         self.shared_embeddings = util.sharedMatrix(vocab_size, embedding_dim,
                                                    'tied_embeddings',
                                                    orthogonal_init=True)
예제 #10
0
    def __init__(self,
                 vocab_size,
                 embedding_dim,
                 idxs=None,
                 sequence_embeddings=None):
        assert (idxs is None) ^ (sequence_embeddings is None)
        #self.name = name

        if idxs is not None:
            # not tying weights, build our own set of embeddings
            self.Wx = util.sharedMatrix(vocab_size,
                                        embedding_dim,
                                        'Wx',
                                        orthogonal_init=True)
            self.sequence_embeddings = self.Wx[idxs]
            self.using_shared_embeddings = False
        else:
            # using tied weights, we won't be handling the update
            self.sequence_embeddings = sequence_embeddings
            self.using_shared_embeddings = True
예제 #11
0
 def __init__(self, n_in, n_embedding, n_hidden, orthogonal_init):
     # for trivial annotation network; both _f (forward) and _b (backwards)
     self.Wx_a_f = util.sharedMatrix(
         n_in, n_embedding, 'Wx_a_f',
         orthogonal_init)  # embeddings for annotations
     self.Whx_f = util.sharedMatrix(n_hidden, n_embedding, 'Whx_f',
                                    orthogonal_init)
     self.Wx_a_b = util.sharedMatrix(
         n_in, n_embedding, 'Wx_a_b',
         orthogonal_init)  # embeddings for annotations
     self.Whx_b = util.sharedMatrix(n_hidden, n_embedding, 'Whx_b',
                                    orthogonal_init)
     # for attention network
     self.Wx_g = util.sharedMatrix(
         n_in, n_embedding, 'Wx_g',
         orthogonal_init)  # embeddings for glimpses
     self.Wug = util.sharedMatrix(n_hidden, n_embedding, 'Wug',
                                  orthogonal_init)
     self.Wag = util.sharedMatrix(n_hidden, n_hidden, 'Wag',
                                  orthogonal_init)
     self.wgs = util.sharedVector(n_hidden, 'Wgs')
     # final mapping to y
     self.Wy = util.sharedMatrix(n_in, n_hidden, 'Wy', orthogonal_init)
예제 #12
0
 def __init__(self, n_in, n_embedding, n_hidden, orthogonal_init):
     self.Wx = util.sharedMatrix(n_in, n_embedding, 'Wx', orthogonal_init)
     self.Wrec = util.sharedMatrix(n_hidden, n_embedding, 'Wrec', orthogonal_init)
     self.Wy = util.sharedMatrix(n_in, n_hidden, 'Wy', orthogonal_init)