Exemplo n.º 1
0
    def __init__(self,
                 input_size=INPUT_SIZE,
                 output_size=OUTPUT_SIZE,
                 hidden_size=HIDDEN_SIZE,
                 embed_size=EMBED_SIZE,
                 lr=LEARNING_RATE,
                 clip_grad=CLIP_GRAD,
                 init_range=INIT_RANGE):
        # this model will generate a vector representation based on the input
        input_layers = [
            Embedding(input_size, embed_size, init_range),
            Lstm(embed_size, hidden_size, init_range),
        ]

        # this model will generate an output sequence based on the hidden vector
        output_layers = [
            Embedding(output_size, embed_size, init_range),
            Lstm(embed_size, hidden_size, init_range,
                 previous=input_layers[1]),
            Softmax(hidden_size, output_size, init_range)
        ]

        self.input_layers, self.output_layers = input_layers, output_layers
        self.hidden_size = hidden_size
        self.embed_size = embed_size
        self.input_size = input_size
        self.output_size = output_size
        self.lr = lr
        self.clip_grad = clip_grad
Exemplo n.º 2
0
    def __init__(self, FLAGS):

        self.embeddingLayers = Embedding(FLAGS.vocab_size, FLAGS.embedding_dim)
        self.cnnGLUBlock = CnnGLUBlock(dropout_rate = FLAGS.dropout_rate,
                                       is_batch_norm = FLAGS.is_batch_norm,
                                       is_training = FLAGS.is_training,
                                       pad_format = FLAGS.pad_format)
Exemplo n.º 3
0
    def __init__(self, name='ra', nimg=2048, nnh=512, na=512, nh=512, nw=512, nout=8843, npatch=30, model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nnh = f.attrs['nnh']
                na = f.attrs['na']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
                # npatch = f.attrs['npatch']
        self.config = {'nimg': nimg, 'nnh': nnh, 'na': na, 'nh': nh, 'nw': nw, 'nout': nout, 'npatch': npatch}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout, dim_emb=nw, name=self.name+'@embedding')

        # initialization mlp layer
        self.init_mlp = MLP(layer_sizes=[na, 2*nh], output_type='tanh', name=self.name+'@init_mlp')
        self.proj_mlp = MLP(layer_sizes=[nimg, na], output_type='tanh', name=self.name+'@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=na+nw, dim_h=nh, name=self.name+'@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[na+nh+nw, nout], output_type='softmax', name=self.name+'@pred_mlp')

        # attention layer
        self.attention = Attention(dim_item=na, dim_context=na+nw+nh, hsize=nnh, name=self.name+'@attention')

        # inputs
        cap = T.imatrix('cap')
        img = T.tensor3('img')
        self.inputs = [cap, img]

        # go through sequence
        feat = self.proj_mlp.compute(img)
        init_e = feat.mean(axis=1)
        init_state = T.concatenate([init_e, self.init_mlp.compute(init_e)], axis=-1)
        (state, self.p, loss, self.alpha), _ = theano.scan(fn=self.scan_func,
                                                           sequences=[cap[0:-1, :], cap[1:, :]],
                                                           outputs_info=[init_state, None, None, None],
                                                           non_sequences=[feat])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.init_mlp, self.proj_mlp, self.attention, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions and variables are used in test stage
        self._init_func = None
        self._step_func = None
        self._proj_func = None
        self._feat_shared = theano.shared(np.zeros((1, npatch, nimg)).astype(theano.config.floatX))
Exemplo n.º 4
0
    def __init__(self, vocab_size, embedding_size, hidden_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size

        self.embedding = Embedding(vocab_size, embedding_size)
        self.lstm = LSTM(embedding_size, hidden_size)
        self.layers = [self.embedding, self.lstm]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))
Exemplo n.º 5
0
    def forward(self, xs: np.ndarray) -> np.ndarray:
        N, T = xs.shape
        V, D = self.W.shape

        out = np.empty((N, T, D), dtype=float)
        self.layers = []

        for t in range(T):
            layer = Embedding(self.W)
            out[:, t, :] = layer.forward(xs[:, t])
            self.layers.append(layer)

        return out
Exemplo n.º 6
0
	def __init__(self, num_layers, d_model, num_heads, dff, vocab_size,batch_size, rate=0.1, use_stats=False):
		super(Transformer, self).__init__()

		self.num_layers =num_layers
		self.vocab_size = vocab_size
		self.batch_size = batch_size
		self.model_depth = d_model
		self.num_heads = num_heads

		self.embedding = Embedding(vocab_size, d_model)
		self.encoder = Encoder(num_layers, d_model, num_heads, dff, vocab_size, rate)
		self.decoder = Decoder(num_layers, d_model, num_heads, dff, vocab_size, rate, use_stats)
		self.final_layer = tf.keras.layers.Dense(vocab_size)
Exemplo n.º 7
0
    def forward(self, idxs):
        w, = self.params
        N, T = idxs.shape
        V, D = w.shape  # 語彙数, 分散表現の次元数

        self.layers = []
        ys = np.empty((N, T, D), dtype='f')

        for t in range(T):
            layer = Embedding(w)
            ys[:, t, :] = layer.forward(idxs[:, t])
            self.layers.append(layer)

        return ys
Exemplo n.º 8
0
    def __init__(self, vocab_size: int, hidden_size: int, window_size: int, corpus: List[int]) -> None:
        W_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float)
        W_out = 0.01 * np.random.randn(vocab_size, hidden_size).astype(float)

        self.in_layers = []
        for i in range(2 * window_size):
            layer = Embedding(W_in)
            self.in_layers.append(layer)
        self.ns_loss = NegativeSamplingLoss(W_out, corpus, power=0.75, sample_size=5)

        layers = self.in_layers + [self.ns_loss]
        self.params, self.grads = [], []
        for layer in layers:
            self.params += layer.params
            self.grads += layer.grads
        
        self.word_vecs = W_in
Exemplo n.º 9
0
    def __init__(self, vocab_size, hidden_size, window_size, corpus):
        # おもみ
        w_in = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')
        w_out = 0.01 * np.random.randn(vocab_size, hidden_size).astype('f')

        # layers
        self.embed_layers = [Embedding(w_in) for _ in range(2 * window_size)]
        self.ns_loss_layer = NegativeSamplingLoss(w_out, corpus)

        # おもみ, 勾配 まとめ
        layers = self.embed_layers + [self.ns_loss_layer]
        self.params, self.grads = [], []
        for l in layers:
            self.params += l.params
            self.grads += l.grads

        # 単語の分散表現
        self.word_vecs = w_in
Exemplo n.º 10
0
def create_output_node(model=None, input_sequences=None, num_gru=None, old_h0s=None, reset=None, num_pixelCNN_layer = None):
	assert(model is not None)
	assert(input_sequences is not None)
	assert(num_gru is not None)
	assert(old_h0s is not None)
	assert(reset is not None)
	assert(num_pixelCNN_layer is not None)

	new_h0s = T.zeros_like(old_h0s)
	h0s = theano.ifelse.ifelse(reset, new_h0s, old_h0s)

	embedding_layer = Embedding(Q_LEVELS, DIM, input_sequences, name = model.name+"Embedding.Q_LEVELS")
	model.add_layer(embedding_layer)


	prev_out = embedding_layer.output()
	last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], WIDTH, DEPTH)))

	pixel_CNN = pixelConv(
		last_layer,
		DEPTH,
		DEPTH,
		name = model.name + ".pxCNN",
		num_layers = NUM_PIXEL_CNN_LAYER
	)

	prev_out = pixel_CNN.output()
	last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], -1)))

	last_hidden_list = []

	for i in range(num_gru):
		gru_layer = GRU(DIM, DIM, last_layer, s0 = h0s[i,:,:], name = model.name+"GRU_{}".format(i))
		last_hidden_list.append(gru_layer.output()[:,-1])
		model.add_layer(gru_layer)
		last_layer = gru_layer

	fc1 = FC(DIM, Q_LEVELS, last_layer, name = model.name+"FullyConnected")
	model.add_layer(fc1)

	softmax = Softmax(fc1, name= model.name+"Softmax")
	model.add_layer(softmax)

	return softmax.output(), T.stack(last_hidden_list, axis = 0)
Exemplo n.º 11
0
 def __init__(self, config):
     super(AESModel, self).__init__()
     self.config = config
     self.e0 = Embedding(config.vocab_size, config.embedding_output, config)
     self.m0 = Modeling(config.embedding_output, config.hidden_size, config)
     self.a0 = Attn(2 * config.hidden_size,
                    2 * config.hidden_size,
                    config.max_length_sent,
                    config,
                    dropout_p=config.dropout)
     self.m1 = Modeling(4 * config.hidden_size, config.hidden_size, config)
     self.a1 = Attn(2 * config.hidden_size,
                    2 * config.hidden_size,
                    config.max_length_sent,
                    config,
                    dropout_p=config.dropout)
     self.m2 = Modeling(4 * config.hidden_size, config.hidden_size, config)
     # self.m2 = Modeling(config.hidden_size, config.hidden_size, config)
     self.o0 = Output(
         2 * config.hidden_size * config.max_length_sent *
         config.max_num_sent, config)
Exemplo n.º 12
0
 def __init__( self
             , word_vocab_size
             , word_emb_dim
             , field_vocab_size
             , field_emb_dim
             , pos_vocab_size
             , pos_emb_dim
             , fglstm_dim):
     super(Encoder, self).__init__()
     self.embedding_layer = Embedding( word_vocab_size
                                     , word_emb_dim
                                     , field_vocab_size
                                     , field_emb_dim
                                     , pos_vocab_size
                                     , pos_emb_dim)
     self._field_pos_emb_dim = self.embedding_layer.get_output_shape()[1][2]
     self.cell = FieldGatingLSTMCell( fglstm_dim
                                    , word_emb_dim
                                    , self._field_pos_emb_dim)
     self.rnn = tf.keras.layers.RNN( self.cell
                                   , return_sequences=True
                                   , return_state=True)
Exemplo n.º 13
0
    def _def_layers(self):

        # word embeddings
        self.word_embedding = Embedding(embedding_size=self.embedding_size,
                                        vocab_size=self.word_vocab_size,
                                        enable_cuda=self.enable_cuda)

        # lstm encoder
        self.encoder = FastUniLSTM(
            ninp=self.embedding_size,
            nhids=self.encoder_rnn_hidden_size,
            dropout_between_rnn_layers=self.dropout_between_rnn_layers)

        self.action_scorer_shared = torch.nn.Linear(
            self.encoder_rnn_hidden_size[-1], self.action_scorer_hidden_dim)
        action_scorers = []
        for _ in range(self.generate_length):
            action_scorers.append(
                torch.nn.Linear(self.action_scorer_hidden_dim,
                                self.word_vocab_size,
                                bias=False))
        self.action_scorers = torch.nn.ModuleList(action_scorers)
        self.fake_recurrent_mask = None
Exemplo n.º 14
0
    def __init__(self, vocab_size, embedding_size, hidden_size, output_size):
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.lstm = LSTM(embedding_size, hidden_size)
        self.lstm_output = TimeDistributed(hidden_size,
                                           output_size,
                                           activation='tanh')
        self.softmax = TimeDistributed(output_size,
                                       vocab_size,
                                       activation='softmax')
        self.embedding = Embedding(vocab_size, embedding_size)

        self.layers = [
            self.lstm, self.lstm_output, self.softmax, self.embedding
        ]
        self.params = list(
            itertools.chain(*[
                layer.params for layer in self.layers
                if hasattr(layer, 'params')
            ]))
 def __init__(self, w):
     self.embed = Embedding(w)
     self.params = self.embed.params
     self.grads = self.embed.grads
     self.cache = None
Exemplo n.º 16
0
    def _def_layers(self):

        # word embeddings
        if self.use_pretrained_embedding:
            self.word_embedding = Embedding(embedding_size=self.word_embedding_size,
                                            vocab_size=self.word_vocab_size,
                                            id2word=self.word_vocab,
                                            dropout_rate=self.embedding_dropout,
                                            load_pretrained=True,
                                            trainable=self.word_embedding_trainable,
                                            embedding_oov_init="random",
                                            pretrained_embedding_path=self.pretrained_embedding_path)
        else:
            self.word_embedding = Embedding(embedding_size=self.word_embedding_size,
                                            vocab_size=self.word_vocab_size,
                                            trainable=self.word_embedding_trainable,
                                            dropout_rate=self.embedding_dropout)

        # node embeddings
        self.node_embedding = Embedding(embedding_size=self.node_embedding_size,
                                        vocab_size=self.node_vocab_size,
                                        trainable=self.node_embedding_trainable,
                                        dropout_rate=self.embedding_dropout)

        # relation embeddings
        self.relation_embedding = Embedding(embedding_size=self.relation_embedding_size,
                                            vocab_size=self.relation_vocab_size,
                                            trainable=self.relation_embedding_trainable,
                                            dropout_rate=self.embedding_dropout)

        self.word_embedding_prj = torch.nn.Linear(self.word_embedding_size, self.block_hidden_dim, bias=False)
        self.encoder =  torch.nn.ModuleList([EncoderBlock(conv_num=self.encoder_conv_num, ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.encoder_layers)])

        self.rgcns = StackedRelationalGraphConvolution(entity_input_dim=self.node_embedding_size+self.block_hidden_dim, relation_input_dim=self.relation_embedding_size+self.block_hidden_dim, num_relations=self.relation_vocab_size, hidden_dims=self.gcn_hidden_dims, num_bases=self.gcn_num_bases,
        use_highway_connections=self.gcn_highway_connections, dropout_rate=self.dropout, real_valued_graph=self.real_valued_graph)
        self.attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)

        self.self_attention_text = SelfAttention(self.block_hidden_dim, self.n_heads, self.dropout)
        self.self_attention_graph = SelfAttention(self.block_hidden_dim, self.n_heads, self.dropout)

        # recurrent memories
        self.recurrent_memory_bi_input = LSTMCell(self.block_hidden_dim * 2, self.block_hidden_dim, use_bias=True)
        self.recurrent_memory_single_input = LSTMCell(self.block_hidden_dim, self.block_hidden_dim, use_bias=True)

        linear_function = NoisyLinear if self.noisy_net else torch.nn.Linear
        self.action_scorer_linear_1_tri_input = linear_function(self.block_hidden_dim * 3, self.block_hidden_dim)
        self.action_scorer_linear_1_bi_input = linear_function(self.block_hidden_dim * 2, self.block_hidden_dim)
        self.action_scorer_linear_2 = linear_function(self.block_hidden_dim, 1)

        # text encoder for pretraining tasks
        # (we separate this because we don't want to init text encoder with pretrained parameters when training RL)
        self.encoder_for_pretraining_tasks =  torch.nn.ModuleList([EncoderBlock(conv_num=self.encoder_conv_num, ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.encoder_layers)])

        # command generation
        self.cmd_gen_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.cmd_gen_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.decoder = torch.nn.ModuleList([DecoderBlock(ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.decoder_layers)])
        self.tgt_word_prj = torch.nn.Linear(self.block_hidden_dim, self.word_vocab_size, bias=False)
        self.pointer_softmax = PointerSoftmax(input_dim=self.block_hidden_dim, hidden_dim=self.block_hidden_dim)

        # observation generation
        self.obs_gen_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.obs_gen_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.obs_gen_decoder = torch.nn.ModuleList([DecoderBlockForObsGen(ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.decoder_layers)])
        self.obs_gen_tgt_word_prj = torch.nn.Linear(self.block_hidden_dim, self.word_vocab_size, bias=False)
        self.obs_gen_linear_1 = torch.nn.Linear(self.block_hidden_dim, self.block_hidden_dim)
        self.obs_gen_linear_2 = torch.nn.Linear(self.block_hidden_dim, int(len(self.relation_vocab) / 2) * len(self.node_vocab) * len(self.node_vocab))
        self.obs_gen_attention_to_rnn_input = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim)
        self.obs_gen_graph_rnncell = torch.nn.GRUCell(self.block_hidden_dim, self.block_hidden_dim)
        self.observation_discriminator = ObservationDiscriminator(self.block_hidden_dim)

        # action prediction
        self.ap_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.ap_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.ap_self_attention = SelfAttention(self.block_hidden_dim * 3, self.n_heads, self.dropout)
        self.ap_linear_1 = torch.nn.Linear(self.block_hidden_dim * 3, self.block_hidden_dim)
        self.ap_linear_2 = torch.nn.Linear(self.block_hidden_dim, 1)

        # state prediction
        self.sp_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout)
        self.sp_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False)
        self.sp_self_attention = SelfAttention(self.block_hidden_dim * 3, self.n_heads, self.dropout)
        self.sp_linear_1 = torch.nn.Linear(self.block_hidden_dim * 3, self.block_hidden_dim)
        self.sp_linear_2 = torch.nn.Linear(self.block_hidden_dim, 1)

        # deep graph infomax
        self.dgi_discriminator = DGIDiscriminator(self.gcn_hidden_dims[-1])
Exemplo n.º 17
0
from tensor import Tensor
from layers import Embedding
from rnn import RNNCell
from losses import CrossEntropyLoss
from optimizers import SGD

with open('data/shakespear.txt', 'r') as f:
    raw = f.read()

vocab = list(set(raw))
word2index = {}
for i, word in enumerate(vocab):
    word2index[word] = i
indices = np.array(list(map(lambda x: word2index[x], raw)))

embed = Embedding(vocab_size=len(vocab), dim=512)
model = RNNCell(n_inputs=512, n_hidden=512, n_output=len(vocab))

criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(),
            alpha=0.01)

batch_size = 32
bptt = 16
n_batches = int((indices.shape[0] / batch_size))
trimmed_indices = indices[:n_batches * batch_size]
# batch_indices: each column represents a sub-sequence from indices -> continuous
batched_indices = trimmed_indices.reshape(batch_size, n_batches)
batched_indices = batched_indices.transpose()

input_batched_indices = batched_indices[:-1]
Exemplo n.º 18
0
d_vocab_size = len(d_w2i)

x = tf.placeholder(tf.int32, [None, None], name='x')
m = tf.cast(tf.not_equal(x, -1), tf.float32)
t = tf.placeholder(tf.int32, [None, None], name='t')
t_in = t[:, :-1]
t_out = t[:, 1:]
t_out_one_hot = tf.one_hot(t_out, depth=d_vocab_size, dtype=tf.float32)

# Attention mask
ma = tf.where(condition=tf.equal(x, PADDING_ID),
              x=tf.ones_like(x, dtype=tf.float32) * np.float32(-1e+10),
              y=tf.ones_like(x, dtype=tf.float32))

encoder = [
    Embedding(e_vocab_size, EMB_DIM),
    GRU(EMB_DIM, HID_DIM, m),
    GRU(EMB_DIM, HID_DIM, m[:, ::-1])
]

x_emb = f_props(encoder[:1], x)
h_ef = f_props(encoder[1:2], x_emb)
h_eb = f_props(encoder[2:], x_emb[:, ::-1])[:, ::-1, :]
h_e = tf.concat([h_ef, h_eb], axis=2)
h_d1_0 = tf.reduce_mean(h_e, axis=1)
h_d2_0 = tf.reduce_mean(h_e, axis=1)

decoder = [
    Embedding(d_vocab_size, EMB_DIM),
    GRU(EMB_DIM, 2 * HID_DIM, tf.ones_like(t_in, dtype='float32'), h_0=h_d1_0),
    Attention(2 * HID_DIM, 2 * HID_DIM, h_e, ma),
 def __init__(self, W: np.ndarray) -> None:
     self.embed = Embedding(W)
     self.params = self.embed.params
     self.grads = self.embed.grads
     self.cache = None
Exemplo n.º 20
0
import numpy as np
from numpy.random import randn
from random import randint

from layers import Lstm, Softmax, Embedding

DELTA = 1e-5
THRESHOLD = 1e-2

EOS = 0
HIDDEN_SIZE = 10

input_layers = [
    Embedding(5, 10),
    Lstm(10, 10),
]

output_layers = [
    Embedding(5, 10),
    Lstm(10, 10, previous=input_layers[1]),
    Softmax(10, 4),
]

X = [randint(0, 4), randint(0, 4)]
Y = [randint(0, 3), randint(0, 3)]


def train():
    # reset state
    for layer in input_layers:
        layer.initSequence()
Exemplo n.º 21
0
    def _def_layers(self):

        # word embeddings
        if self.use_pretrained_embedding:
            self.word_embedding = Embedding(
                embedding_size=self.word_embedding_size,
                vocab_size=self.word_vocab_size,
                id2word=self.word_vocab,
                dropout_rate=self.embedding_dropout,
                load_pretrained=True,
                trainable=self.word_embedding_trainable,
                embedding_oov_init="random",
                pretrained_embedding_path=self.pretrained_embedding_path)
        else:
            self.word_embedding = Embedding(
                embedding_size=self.word_embedding_size,
                vocab_size=self.word_vocab_size,
                trainable=self.word_embedding_trainable,
                dropout_rate=self.embedding_dropout)

        # char embeddings
        self.char_embedding = Embedding(
            embedding_size=self.char_embedding_size,
            vocab_size=self.char_vocab_size,
            trainable=self.char_embedding_trainable,
            dropout_rate=self.embedding_dropout)

        self.merge_embeddings = MergeEmbeddings(
            block_hidden_dim=self.block_hidden_dim,
            word_emb_dim=self.word_embedding_size,
            char_emb_dim=self.char_embedding_size,
            dropout=self.embedding_dropout)

        self.encoders = torch.nn.ModuleList([
            EncoderBlock(conv_num=self.encoder_conv_num,
                         ch_num=self.block_hidden_dim,
                         k=7,
                         block_hidden_dim=self.block_hidden_dim,
                         n_head=self.n_heads,
                         dropout=self.block_dropout)
            for _ in range(self.encoder_layers)
        ])

        self.context_question_attention = CQAttention(
            block_hidden_dim=self.block_hidden_dim,
            dropout=self.attention_dropout)

        self.context_question_attention_resizer = torch.nn.Linear(
            self.block_hidden_dim * 4, self.block_hidden_dim)

        self.aggregators = torch.nn.ModuleList([
            EncoderBlock(conv_num=self.aggregation_conv_num,
                         ch_num=self.block_hidden_dim,
                         k=5,
                         block_hidden_dim=self.block_hidden_dim,
                         n_head=self.n_heads,
                         dropout=self.block_dropout)
            for _ in range(self.aggregation_layers)
        ])

        linear_function = NoisyLinear if self.noisy_net else torch.nn.Linear
        self.action_scorer_shared_linear = linear_function(
            self.block_hidden_dim, self.action_scorer_hidden_dim)

        if self.use_distributional:
            if self.dueling_networks:
                action_scorer_output_size = self.atoms
                action_scorer_advantage_output_size = self.word_vocab_size * self.atoms
            else:
                action_scorer_output_size = self.word_vocab_size * self.atoms
        else:
            if self.dueling_networks:
                action_scorer_output_size = 1
                action_scorer_advantage_output_size = self.word_vocab_size
            else:
                action_scorer_output_size = self.word_vocab_size

        action_scorers = []
        for _ in range(self.generate_length):
            action_scorers.append(
                linear_function(self.action_scorer_hidden_dim,
                                action_scorer_output_size))
        self.action_scorers = torch.nn.ModuleList(action_scorers)

        if self.dueling_networks:
            action_scorers_advantage = []
            for _ in range(self.generate_length):
                action_scorers_advantage.append(
                    linear_function(self.action_scorer_hidden_dim,
                                    action_scorer_advantage_output_size))
            self.action_scorers_advantage = torch.nn.ModuleList(
                action_scorers_advantage)

        self.answer_pointer = AnswerPointer(
            block_hidden_dim=self.block_hidden_dim, noisy_net=self.noisy_net)

        if self.answer_type in ["2 way"]:
            self.question_answerer_output_1 = linear_function(
                self.block_hidden_dim, self.question_answerer_hidden_dim)
            self.question_answerer_output_2 = linear_function(
                self.question_answerer_hidden_dim, 2)
Exemplo n.º 22
0
    def __init__(self,
                 name='gnic',
                 nimg=2048,
                 nh=512,
                 nw=512,
                 nout=8843,
                 model_file=None):
        self.name = name
        if model_file is not None:
            with h5py.File(model_file, 'r') as f:
                nimg = f.attrs['nimg']
                nh = f.attrs['nh']
                nw = f.attrs['nw']
                nout = f.attrs['nout']
        self.config = {'nimg': nimg, 'nh': nh, 'nw': nw, 'nout': nout}

        # word embedding layer
        self.embedding = Embedding(n_emb=nout,
                                   dim_emb=nw,
                                   name=self.name + '@embedding')

        # initialization mlp layer
        self.proj_mlp = MLP(layer_sizes=[nimg, 2 * nh],
                            output_type='tanh',
                            name=self.name + '@proj_mlp')

        # lstm
        self.lstm = BasicLSTM(dim_x=nw, dim_h=nh, name=self.name + '@lstm')

        # prediction mlp
        self.pred_mlp = MLP(layer_sizes=[nh + nw, nout],
                            output_type='softmax',
                            name=self.name + '@pred_mlp')

        # inputs
        cap = T.imatrix('cap')
        img = T.matrix('img')
        self.inputs = [cap, img]

        # go through sequence
        init_state = self.proj_mlp.compute(img)
        (state, self.p,
         loss), _ = theano.scan(fn=self.scan_func,
                                sequences=[cap[0:-1, :], cap[1:, :]],
                                outputs_info=[init_state, None, None])

        # loss function
        loss = T.mean(loss)
        self.costs = [loss]

        # layers and parameters
        self.layers = [self.embedding, self.proj_mlp, self.lstm, self.pred_mlp]
        self.params = sum([l.params for l in self.layers], [])

        # load weights from file, if model_file is not None
        if model_file is not None:
            self.load_weights(model_file)

        # these functions are used in test stage
        self._init_func = None
        self._step_func = None
Exemplo n.º 23
0
    def __init__(self, args):
        super(QAxl, self).__init__()

        hidden_size = args['hidden_size']
        dropout = args['dropout']
        attention_size = args['attention_size']
        word_emb = np.array(read_json(args['data_dir'] + 'word_emb.json'),
                            dtype=np.float32)
        word_size = word_emb.shape[0]
        word_dim = word_emb.shape[1]
        char_dim = args['char_dim']
        char_len = len(read_json(args['data_dir'] + 'char2id.json'))
        pos_dim = args['pos_dim']
        ner_dim = args['ner_dim']

        self.args = args
        self.train_loss = AverageMeter()
        self.use_cuda = args['use_cuda']
        self.use_xl = args['use_xl']

        if self.use_xl:
            self.xl = TransfoXLModel.from_pretrained('transfo-xl-wt103')
            xl_dim = 1024

        ## Embedding Layer
        print('Building embedding...')
        self.word_embeddings = nn.Embedding(word_emb.shape[0],
                                            word_dim,
                                            padding_idx=0)
        self.word_embeddings.weight.data = torch.from_numpy(word_emb)
        self.char_embeddings = nn.Embedding(char_len, char_dim, padding_idx=0)
        self.pos_embeddings = nn.Embedding(args['pos_size'],
                                           args['pos_dim'],
                                           padding_idx=0)
        self.ner_embeddings = nn.Embedding(args['ner_size'],
                                           args['ner_dim'],
                                           padding_idx=0)
        with open(args['data_dir'] + 'tune_word_idx.pkl', 'rb') as f:
            tune_idx = pkl.load(f)
        self.fixed_idx = list(
            set([i for i in range(word_size)]) - set(tune_idx))
        fixed_embedding = torch.from_numpy(word_emb)[self.fixed_idx]
        self.register_buffer('fixed_embedding', fixed_embedding)
        self.fixed_embedding = fixed_embedding

        low_p_dim = word_dim + word_dim + args['pos_dim'] + args['ner_dim'] + 4
        low_q_dim = word_dim + args['pos_dim'] + args['ner_dim']
        if self.use_xl:
            low_p_dim += xl_dim
            low_q_dim += xl_dim

        self.emb_char = Embedding(word_dim, char_dim, hidden_size)

        ## Forward Layers Declaration
        high_p_dim = 2 * hidden_size
        full_q_dim = 2 * high_p_dim
        attention_dim = word_dim + full_q_dim
        if self.use_xl:
            attention_dim += xl_dim

        self.word_attention_layer = WordAttention(word_dim, attention_size,
                                                  dropout)

        self.low_rnn = StackedPaddedRNN(low_p_dim,
                                        hidden_size,
                                        1,
                                        dropout=dropout)
        self.high_rnn = StackedPaddedRNN(high_p_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)
        self.full_rnn = StackedPaddedRNN(full_q_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)

        self.low_attention_layer = MultiAttention(attention_dim,
                                                  attention_size, dropout)
        self.high_attention_layer = MultiAttention(attention_dim,
                                                   attention_size, dropout)
        self.full_attention_layer = MultiAttention(attention_dim,
                                                   attention_size, dropout)

        ## Fusion Layer and Final Attention + Final RNN
        fuse_dim = 10 * hidden_size
        self_attention_dim = 12 * hidden_size + word_dim + ner_dim + pos_dim + 1
        if self.use_xl:
            self_attention_dim += xl_dim

        self.fuse_rnn = StackedPaddedRNN(fuse_dim,
                                         hidden_size,
                                         1,
                                         dropout=dropout)
        self.self_attention_layer = MultiAttention(self_attention_dim,
                                                   attention_size, dropout)
        self.self_rnn = StackedPaddedRNN(4 * hidden_size,
                                         hidden_size,
                                         1,
                                         dropout=dropout)

        ## Verifier and output
        self.summ_layer = PointerS(2 * hidden_size,
                                   dropout=dropout,
                                   use_cuda=self.use_cuda)
        self.summ_layer2 = PointerS(2 * hidden_size,
                                    dropout=dropout,
                                    use_cuda=self.use_cuda)
        self.pointer_layer = PointerNet(2 * hidden_size,
                                        use_cuda=self.use_cuda)
        self.has_ans = nn.Sequential(nn.Dropout(p=dropout),
                                     nn.Linear(6 * hidden_size, 2))
Exemplo n.º 24
0
def build_sequence_adversarial_model(params, mappings):
    # construct word embedding layers
    target_word_embedding = Embedding(params['word_vocab_size'],
                                      params['word_dim'])
    related_word_embedding = Embedding(params['bi_word_vocab_size'],
                                       params['word_dim'])
    load_pretrained(target_word_embedding.emb, mappings['id_to_word'],
                    params['target_emb'])
    load_pretrained(related_word_embedding.emb, mappings['bi_id_to_word'],
                    params['related_emb'])

    # char embedding layer
    char_embedding = Embedding(params['char_vocab_size'], params['char_dim'])

    # CNN and concatenate with word for target language
    target_char_cnn_word = CharCnnWordEmb(params['word_dim'],
                                          params['char_dim'],
                                          params['char_conv'],
                                          params['max_word_length'],
                                          params['filter_withs'])

    # CNN and concatenate with word for related language
    related_char_cnn_word = CharCnnWordEmb(params['word_dim'],
                                           params['char_dim'],
                                           params['char_conv'],
                                           params['max_word_length'],
                                           params['filter_withs'])

    # sequence encoder
    adv_lstm = EncodeLstm(params['char_cnn_word_dim'],
                          params['char_cnn_word_dim'],
                          bidrection=True,
                          dropout=params['dropout'])

    # sequence discriminator
    seq_discriminator = CnnDiscriminator(params['char_cnn_word_dim'] * 2,
                                         params['word_lstm_dim'], [2, 3], 1)

    # context encoder
    context_lstm = EncodeLstm(params['char_cnn_word_dim'] * 2,
                              params['word_lstm_dim'],
                              dropout=params['dropout'])
    # linear projection
    linear_proj = LinearProj(params['word_lstm_dim'] * 2,
                             params['word_lstm_dim'], params['label_size'])

    tagger_criterion = CRFLoss(params['label_size'])

    dis_criterion = nn.NLLLoss()

    if params['gpu']:
        target_word_embedding = target_word_embedding.cuda()
        related_word_embedding = related_word_embedding.cuda()
        char_embedding = char_embedding.cuda()
        target_char_cnn_word = target_char_cnn_word.cuda()
        related_char_cnn_word = related_char_cnn_word.cuda()
        adv_lstm = adv_lstm.cuda()
        seq_discriminator = seq_discriminator.cuda()
        context_lstm = context_lstm.cuda()
        linear_proj = linear_proj.cuda()
        tagger_criterion = tagger_criterion.cuda()
        dis_criterion = dis_criterion.cuda()

    return target_word_embedding, related_word_embedding, char_embedding, target_char_cnn_word, \
        related_char_cnn_word, adv_lstm, seq_discriminator, context_lstm, linear_proj, \
        tagger_criterion, dis_criterion