def prep_model(glove, vocab, dropout=1/2, dropout_w=0, dropout_in=4/5, l2reg=1e-4, cnnact='tanh', cnninit='glorot_uniform', cdim={1: 1, 2: 1/2, 3: 1/2, 4: 1/2, 5: 1/2}, project=True, pdim=2.5, ptscorer=B.mlp_ptscorer, mlpsum='sum', Ddim=1, oact='sigmoid'): model = Graph() N = B.embedding(model, glove, vocab, s0pad, s1pad, dropout, dropout_w) if dropout_in is None: dropout_in = dropout Nc = B.cnnsum_input(model, N, s0pad, dropout=dropout_in, l2reg=l2reg, cnninit=cnninit, cnnact=cnnact, cdim=cdim) # Projection if project: model.add_shared_node(name='proj', inputs=['e0s_', 'e1s_'], outputs=['e0p', 'e1p'], layer=Dense(input_dim=Nc, output_dim=int(N*pdim), W_regularizer=l2(l2reg))) # This dropout is controversial; it might be harmful to apply, # or at least isn't a clear win. # model.add_shared_node(name='projdrop', inputs=['e0p', 'e1p'], outputs=['e0p_', 'e1p_'], # layer=Dropout(dropout_in, input_shape=(N,))) # final_outputs = ['e0p_', 'e1p_'] final_outputs = ['e0p', 'e1p'] else: final_outputs = ['e0s_', 'e1s_'] # Measurement kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = mlpsum model.add_node(name='scoreS', input=ptscorer(model, final_outputs, Ddim, N, l2reg, **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def prep_model(self, module_prep_model): # Input embedding and encoding model = Graph() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = final_outputs[1] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] model.add_node(name='scoreS0', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out0", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreS1', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out1", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreS2', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out2", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreV', inputs=['scoreS0', 'scoreS1', 'scoreS2'], merge_mode='concat', layer=Activation('softmax')) model.add_output(name='score', input='scoreV') return model
def prep_model(glove, vocab, module_prep_model, c, spad=spad): # Input embedding and encoding model = Graph() N = B.embedding(model, glove, vocab, spad, spad, c['inp_e_dropout'], c['inp_w_dropout'], add_flags=c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, spad, spad, c) # Measurement kwargs = dict() if c['ptscorer'] == B.mlp_ptscorer: kwargs['sum_mode'] = c['mlpsum'] model.add_node(name='scoreS0', input=c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'],pfx="out0", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreS1', input=c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'],pfx="out1", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreS2', input=c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'],pfx="out2", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreV', inputs=['scoreS0', 'scoreS1', 'scoreS2'], merge_mode='concat', layer=Activation('softmax')) model.add_output(name='score', input='scoreV') return model
def prep_model(self, module_prep_model, oact='sigmoid'): # Input embedding and encoding model = Graph() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = final_outputs[1] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] if self.c['f_add_kw']: model.add_input('kw', input_shape=(1,)) model.add_input('akw', input_shape=(1,)) kwargs['extra_inp'] = ['kw', 'akw'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def prep_model(self, module_prep_model): # Input embedding and encoding model = Graph() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = [final_outputs[1]] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] kwargs['Dinit'] = self.c['Dinit'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs), layer=Activation('linear')) model.add_node(name='out', input='scoreS', layer=Dense(6, W_regularizer=l2(self.c['l2reg']))) model.add_node(name='outS', input='out', layer=Activation('softmax')) model.add_output(name='classes', input='outS') return model
def prep_model(glove, vocab, module_prep_model, c, spad=spad): # Input embedding and encoding model = Graph() N = B.embedding(model, glove, vocab, spad, spad, c['inp_e_dropout'], c['inp_w_dropout'], add_flags=c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, spad, spad, c) # Measurement kwargs = dict() if c['ptscorer'] == B.mlp_ptscorer: kwargs['sum_mode'] = c['mlpsum'] model.add_node(name='scoreS', input=c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], **kwargs), layer=Activation('linear')) model.add_node(name='out', input='scoreS', layer=Dense(6, W_regularizer=l2(c['l2reg']))) model.add_node(name='outS', input='out', layer=Activation('softmax')) model.add_output(name='classes', input='outS') return model
def prep_model(glove, vocab, module_prep_model, c, oact): # Input embedding and encoding model = Graph() N = B.embedding(model, glove, vocab, s0pad, s1pad, c['inp_e_dropout']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, s0pad, s1pad, c) # Measurement if c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = final_outputs[1] else: ptscorer = c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = c['mlpsum'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, c['Ddim'], N, c['l2reg'], **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def prep_model(glove, vocab, module_prep_model, c, oact, s0pad, s1pad): # Input embedding and encoding model = Graph() N = B.embedding(model, glove, vocab, s0pad, s1pad, c['inp_e_dropout'], c['inp_w_dropout'], add_flags=c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, s0pad, s1pad, c) # Measurement if c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = final_outputs[1] else: ptscorer = c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = c['mlpsum'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, c['Ddim'], N, c['l2reg'], **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def prep_model(self, module_prep_model): # Input embedding and encoding model = Graph() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = final_outputs[1] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] model.add_node(name='scoreS', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs), layer=Activation('linear')) model.add_node(name='out', input='scoreS', layer=Dense(6, W_regularizer=l2(self.c['l2reg']))) model.add_node(name='outS', input='out', layer=Activation('softmax')) model.add_output(name='classes', input='outS') return model
def prep_model(self, module_prep_model): # Input embedding and encoding # model inputs si0 = Input(name='si0', shape=(self.s0pad, ), dtype='int32') se0 = Input(name='se0', shape=(self.s0pad, self.emb.N)) si1 = Input(name='si1', shape=(self.s1pad, ), dtype='int32') se1 = Input(name='se1', shape=(self.s1pad, self.emb.N)) inputs = [si0, se0, si1, se1] if self.c['e_add_flags']: f0 = Input(name='f0', shape=(self.s0pad, nlp.flagsdim)) f1 = Input(name='f1', shape=(self.s1pad, nlp.flagsdim)) inputs = [si0, se0, si1, se1, f0, f1] # embedding block embedded, N_emb = B.embedding(inputs, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings # final_outputs are two vectors representing s1 and s2 final_outputs, N = module_prep_model(embedded, N_emb, self.s0pad, self.s1pad, self.c) if len(final_outputs) == 1: out = Dense(6, kernel_regularizer=l2(self.c['l2reg']))( final_outputs[0]) outS = Activation('softmax')(out) return Model(inputs=inputs, outputs=outS) # Measurement ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] kwargs['Dinit'] = self.c['Dinit'] scoreS = Activation('linear')(ptscorer(final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs)) out = Dense(6, kernel_regularizer=l2(self.c['l2reg']))(scoreS) outS = Activation('softmax')(out) model = Model(inputs=inputs, outputs=outS) return model
def _prep_model(model, glove, vocab, module_prep_model, c, oact, s0pad, s1pad, rnn_dim): # Input embedding and encoding N = embedding(model, glove, vocab, s0pad, s1pad, c['inp_e_dropout'], c['w_dropout'], add_flags=c['e_add_flags'], create_inputs=False) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, s0pad, s1pad, c) if c['ptscorer'] is None: model.add_node(name='scoreS1', input=final_outputs[0], layer=Dense(rnn_dim, activation=oact)) model.add_node(name='scoreS2', input=final_outputs[1], layer=Dense(rnn_dim, activation=oact)) else: next_input = c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], pfx='S1_') model.add_node(name='scoreS1', input=next_input, layer=Activation(oact)) model.add_node(name='scoreS2', input=next_input, layer=Activation(oact))
def prep_model(self, module_prep_model, oact="sigmoid"): # Input embedding and encoding model = Graph() N = B.embedding( model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c["inp_e_dropout"], self.c["inp_w_dropout"], add_flags=self.c["e_add_flags"], ) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c["ptscorer"] == "1": # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = [final_outputs[1]] else: ptscorer = self.c["ptscorer"] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs["sum_mode"] = self.c["mlpsum"] kwargs["Dinit"] = self.c["Dinit"] if "f_add" in self.c: for inp in self.c["f_add"]: model.add_input(inp, input_shape=(1,)) # assumed scalar kwargs["extra_inp"] = self.c["f_add"] model.add_node( name="scoreS", input=ptscorer(model, final_outputs, self.c["Ddim"], N, self.c["l2reg"], **kwargs), layer=Activation(oact), ) model.add_output(name="score", input="scoreS") return model
def _prep_model(model, glove, vocab, module_prep_model, c, oact, s0pad, s1pad, rnn_dim): # Input embedding and encoding N = B.embedding(model, glove, vocab, s0pad, s1pad, c['inp_e_dropout'], c['w_dropout'], add_flags=c['e_add_flags'], create_inputs=False) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, s0pad, s1pad, c) if c['ptscorer'] is None: model.add_node(name='scoreS1', input=final_outputs[0], layer=Dense(rnn_dim, activation=oact)) model.add_node(name='scoreS2', input=final_outputs[1], layer=Dense(rnn_dim, activation=oact)) else: next_input = c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], pfx='S1_') model.add_node(name='scoreS1', input=next_input, layer=Activation(oact)) next_input = c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], pfx='S2_') model.add_node(name='scoreS2', input=next_input, layer=Activation(oact))
def prep_model(glove, vocab, module_prep_model, c): # Input embedding and encoding model = Graph() N = B.embedding(model, glove, vocab, spad, spad, c['inp_e_dropout'], c['inp_w_dropout'], add_flags=c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, spad, spad, c) # Measurement kwargs = dict() if c['ptscorer'] == B.mlp_ptscorer: kwargs['sum_mode'] = c['mlpsum'] model.add_node(name='scoreS', input=c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], **kwargs), layer=Activation('linear')) model.add_node(name='out', input='scoreS', layer=Dense(6, W_regularizer=l2(c['l2reg']))) model.add_node(name='outS', input='out', layer=Activation('softmax')) model.add_output(name='classes', input='outS') return model
def _prep_model(model, glove, vocab, module_prep_model, c, oact, s0pad, s1pad, rnn_dim, make_S1, make_S2): if not make_S1 and not make_S2: return # Input embedding and encoding N = B.embedding(model, glove, vocab, s0pad, s1pad, c['inp_e_dropout'], c['inp_w_dropout'], add_flags=c['e_add_flags'], create_inputs=False) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, s0pad, s1pad, c) kwargs_S1 = dict() kwargs_S2 = dict() if c['ptscorer'] == B.mlp_ptscorer: kwargs_S1['sum_mode'] = c['mlpsum'] kwargs_S2['sum_mode'] = c['mlpsum'] kwargs_S1['Dinit'] = c['Dinit'] kwargs_S2['Dinit'] = c['Dinit'] if 'f_add_S1' in c: kwargs_S1['extra_inp'] = c['f_add_S1'] if 'f_add_S2' in c: kwargs_S2['extra_inp'] = c['f_add_S2'] if c['ptscorer'] == '1': if 'extra_inp' in kwargs_S1 or 'extra_inp' in kwargs_S1: print("Warning: Ignoring extra_inp with ptscorer '1'") if make_S1: model.add_node(name='scoreS1', input=final_outputs[1], layer=Dense(rnn_dim, activation=oact, W_regularizer=l2(c['l2reg']))) if make_S2: model.add_node(name='scoreS2', input=final_outputs[1], layer=Dense(rnn_dim, activation=oact, W_regularizer=l2(c['l2reg']))) else: if make_S1: next_input = c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], pfx='S1_', **kwargs_S1) model.add_node(name='scoreS1', input=next_input, layer=Activation(oact)) if make_S2: next_input = c['ptscorer'](model, final_outputs, c['Ddim'], N, c['l2reg'], pfx='S2_', **kwargs_S2) model.add_node(name='scoreS2', input=next_input, layer=Activation(oact))
def prep_model(self, module_prep_model, oact='sigmoid'): """ # Input embedding and encoding model = Sequential() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement """ si0 = Input(name='si0', shape=(self.s0pad, ), dtype='int32') se0 = Input(name='se0', shape=(self.s0pad, self.emb.N)) si1 = Input(name='si1', shape=(self.s1pad, ), dtype='int32') se1 = Input(name='se1', shape=(self.s1pad, self.emb.N)) inputs = [si0, se0, si1, se1] if self.c['e_add_flags']: f0 = Input(name='f0', shape=(self.s0pad, nlp.flagsdim)) f1 = Input(name='f1', shape=(self.s1pad, nlp.flagsdim)) inputs = [si0, se0, si1, se1, f0, f1] # embedding block embedded, N_emb = B.embedding(inputs, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings # final_outputs are two vectors representing s1 and s2 final_outputs, N = module_prep_model(embedded, N_emb, self.s0pad, self.s1pad, self.c) if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = [final_outputs[1]] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] kwargs['Dinit'] = self.c['Dinit'] if 'f_add' in self.c: # TODO for inp in self.c['f_add']: model.add_input(inp, input_shape=(1, )) # assumed scalar kwargs['extra_inp'] = self.c['f_add'] scoreS = Activation(oact)(ptscorer(final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs)) model = Model(inputs=inputs, outputs=scoreS) """ model.add_node(name='scoreS', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') """ return model
def prep_model(glove, vocab, dropout=3 / 4, dropout_in=None, l2reg=1e-4, rnnbidi=True, rnn=GRU, rnnbidi_mode='sum', rnnact='tanh', rnninit='glorot_uniform', sdim=2, rnnlevels=1, pool_layer=MaxPooling1D, cnnact='tanh', cnninit='glorot_uniform', cdim=2, cfiltlen=3, project=True, adim=1 / 2, attn_mode='sum', fact='softmax', ptscorer=B.mlp_ptscorer, mlpsum='sum', Ddim=2, oact='sigmoid'): model = Graph() N = B.embedding(model, glove, vocab, s0pad, s1pad, dropout, dropout_w=.5) # fix if dropout_in is None: dropout_in = dropout # FIXME: pool_layer=None is in fact not supported, since this RNN # would return a scalar for e1s too; instead, we'l need to manually # pick the first&last element of the returned sequence from e0s B.rnn_input(model, N, s0pad, return_sequences=(pool_layer is not None), rnnlevels=rnnlevels, dropout=dropout_in, sdim=sdim, rnnbidi=rnnbidi, rnnbidi_mode=rnnbidi_mode, rnn=rnn, rnnact=rnnact, rnninit=rnninit) # Generate e0s aggregate embedding e0_aggreg, gwidth = aggregate(model, 'e0s_', 'e0', N, s0pad, pool_layer, dropout=dropout_in, l2reg=l2reg, sdim=sdim, cnnact=cnnact, cdim=cdim, cfiltlen=cfiltlen, project=project) if project: # ...and re-embed e0, e1 in attention space awidth = int(N * adim) model.add_node(name='e0a', input=e0_aggreg, layer=Dense(input_dim=gwidth, output_dim=awidth, W_regularizer=l2(l2reg))) e0_aggreg_attn = 'e0a' model.add_node(name='e1sa_', input='e1s', layer=TimeDistributedDense(input_dim=int(N * sdim), output_dim=awidth, W_regularizer=l2(l2reg))) # XXX: this dummy works around a mysterious theano error model.add_node(name='e1sa', input='e1sa_', layer=Activation('linear')) e1_attn = 'e1sa' else: e1_attn = 'e1s' e0_aggreg_attn = e0_aggreg # Now, build an attention function f(e0a, e1sa) -> e1a, producing an # (s1pad,) vector of scalars denoting the attention for each e1 token model.add_node(name='e0sa', input=e0_aggreg_attn, layer=RepeatVector(s1pad)) if attn_mode == 'dot' or attn_mode == 'cos': # model attention by dot-product, i.e. similarity measure of question # aggregate and answer token in attention space model.add_node(name='e1a[1]', layer=B.dot_time_distributed_merge( model, ['e0sa', e1_attn], cos_norm=(attn_mode == 'cos'))) else: # traditional attention model from Hermann et al., 2015 and Tan et al., 2015 # we want to model attention as w*tanh(e0a + e1sa[i]) model.add_node(name='e1a[0]', inputs=['e0sa', e1_attn], merge_mode='sum', layer=Activation('tanh')) model.add_node(name='e1a[1]', input='e1a[0]', layer=TimeDistributedDense(input_dim=awidth, output_dim=1, W_regularizer=l2(l2reg))) model.add_node(name='e1a[2]', input='e1a[1]', layer=Flatten(input_shape=(s1pad, 1))) # *Focus* e1 by softmaxing (by default) attention and multiplying tokens # by their attention. model.add_node(name='e1a[3]', input='e1a[2]', layer=Activation(fact)) model.add_node(name='e1a[4]', input='e1a[3]', layer=RepeatVector(int(N * sdim))) model.add_node(name='e1a', input='e1a[4]', layer=Permute((2, 1))) model.add_node(name='e1sm', inputs=['e1s_', 'e1a'], merge_mode='mul', layer=Activation('linear')) # Generate e1sm aggregate embedding e1_aggreg, gwidth = aggregate(model, 'e1sm', 'e1', N, s1pad, pool_layer, dropout=dropout_in, l2reg=l2reg, sdim=sdim, cnnact=cnnact, cdim=cdim, cfiltlen=cfiltlen, project=project) if ptscorer == '1': # special scoring mode just based on the answer # (assuming that the question match is carried by the attention) ptscorer = B.cat_ptscorer final_outputs = [e1_aggreg] else: final_outputs = [e0_aggreg, e1_aggreg] # Measurement kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = mlpsum model.add_node(name='scoreS', input=ptscorer(model, final_outputs, Ddim, N, l2reg, **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def prep_model(glove, vocab, dropout=3/4, dropout_in=None, l2reg=1e-4, rnnbidi=True, rnn=GRU, rnnbidi_mode='sum', rnnact='tanh', rnninit='glorot_uniform', sdim=2, rnnlevels=1, pool_layer=MaxPooling1D, cnnact='tanh', cnninit='glorot_uniform', cdim=2, cfiltlen=3, project=True, adim=1/2, attn_mode='sum', fact='softmax', ptscorer=B.mlp_ptscorer, mlpsum='sum', Ddim=2, oact='sigmoid'): model = Graph() N = B.embedding(model, glove, vocab, s0pad, s1pad, dropout, dropout_w=.5) # fix if dropout_in is None: dropout_in = dropout # FIXME: pool_layer=None is in fact not supported, since this RNN # would return a scalar for e1s too; instead, we'l need to manually # pick the first&last element of the returned sequence from e0s B.rnn_input(model, N, s0pad, return_sequences=(pool_layer is not None), rnnlevels=rnnlevels, dropout=dropout_in, sdim=sdim, rnnbidi=rnnbidi, rnnbidi_mode=rnnbidi_mode, rnn=rnn, rnnact=rnnact, rnninit=rnninit) # Generate e0s aggregate embedding e0_aggreg, gwidth = aggregate(model, 'e0s_', 'e0', N, s0pad, pool_layer, dropout=dropout_in, l2reg=l2reg, sdim=sdim, cnnact=cnnact, cdim=cdim, cfiltlen=cfiltlen, project=project) if project: # ...and re-embed e0, e1 in attention space awidth = int(N*adim) model.add_node(name='e0a', input=e0_aggreg, layer=Dense(input_dim=gwidth, output_dim=awidth, W_regularizer=l2(l2reg))) e0_aggreg_attn = 'e0a' model.add_node(name='e1sa_', input='e1s', layer=TimeDistributedDense(input_dim=int(N*sdim), output_dim=awidth, W_regularizer=l2(l2reg))) # XXX: this dummy works around a mysterious theano error model.add_node(name='e1sa', input='e1sa_', layer=Activation('linear')) e1_attn = 'e1sa' else: e1_attn = 'e1s' e0_aggreg_attn = e0_aggreg # Now, build an attention function f(e0a, e1sa) -> e1a, producing an # (s1pad,) vector of scalars denoting the attention for each e1 token model.add_node(name='e0sa', input=e0_aggreg_attn, layer=RepeatVector(s1pad)) if attn_mode == 'dot' or attn_mode == 'cos': # model attention by dot-product, i.e. similarity measure of question # aggregate and answer token in attention space model.add_node(name='e1a[1]', layer=B.dot_time_distributed_merge(model, ['e0sa', e1_attn], cos_norm=(attn_mode == 'cos'))) else: # traditional attention model from Hermann et al., 2015 and Tan et al., 2015 # we want to model attention as w*tanh(e0a + e1sa[i]) model.add_node(name='e1a[0]', inputs=['e0sa', e1_attn], merge_mode='sum', layer=Activation('tanh')) model.add_node(name='e1a[1]', input='e1a[0]', layer=TimeDistributedDense(input_dim=awidth, output_dim=1, W_regularizer=l2(l2reg))) model.add_node(name='e1a[2]', input='e1a[1]', layer=Flatten(input_shape=(s1pad, 1))) # *Focus* e1 by softmaxing (by default) attention and multiplying tokens # by their attention. model.add_node(name='e1a[3]', input='e1a[2]', layer=Activation(fact)) model.add_node(name='e1a[4]', input='e1a[3]', layer=RepeatVector(int(N*sdim))) model.add_node(name='e1a', input='e1a[4]', layer=Permute((2,1))) model.add_node(name='e1sm', inputs=['e1s_', 'e1a'], merge_mode='mul', layer=Activation('linear')) # Generate e1sm aggregate embedding e1_aggreg, gwidth = aggregate(model, 'e1sm', 'e1', N, s1pad, pool_layer, dropout=dropout_in, l2reg=l2reg, sdim=sdim, cnnact=cnnact, cdim=cdim, cfiltlen=cfiltlen, project=project) if ptscorer == '1': # special scoring mode just based on the answer # (assuming that the question match is carried by the attention) ptscorer = B.cat_ptscorer final_outputs = [e1_aggreg] else: final_outputs = [e0_aggreg, e1_aggreg] # Measurement kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = mlpsum model.add_node(name='scoreS', input=ptscorer(model, final_outputs, Ddim, N, l2reg, **kwargs), layer=Activation(oact)) model.add_output(name='score', input='scoreS') return model
def prep_model(self, module_prep_model): # Input embedding and encoding model = Graph() N = B.embedding(model, self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) # Sentence-aggregate embeddings final_outputs = module_prep_model(model, N, self.s0pad, self.s1pad, self.c) # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = [final_outputs[1]] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] kwargs['Dinit'] = self.c['Dinit'] model.add_node(name='scoreS0', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out0", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreS1', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out1", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreS2', input=ptscorer(model, final_outputs, self.c['Ddim'], N, self.c['l2reg'], pfx="out2", **kwargs), layer=Activation('sigmoid')) model.add_node(name='scoreV', inputs=['scoreS0', 'scoreS1', 'scoreS2'], merge_mode='concat', layer=Activation('softmax')) model.add_output(name='score', input='scoreV') return model
def prep_model(self, module_prep_model): # Input embedding and encoding # model inputs si0 = Input(name='si0', shape=(self.s0pad, ), dtype='int32') se0 = Input(name='se0', shape=(self.s0pad, self.emb.N)) si1 = Input(name='si1', shape=(self.s1pad, ), dtype='int32') se1 = Input(name='se1', shape=(self.s1pad, self.emb.N)) inputs = [si0, se0, si1, se1] if self.c['e_add_flags']: f0 = Input(name='f0', shape=(self.s0pad, nlp.flagsdim)) f1 = Input(name='f1', shape=(self.s1pad, nlp.flagsdim)) inputs = [si0, se0, si1, se1, f0, f1] # embedding block embedding, N_emb = B.embedding(self.emb, self.vocab, self.s0pad, self.s1pad, self.c['inp_e_dropout'], self.c['inp_w_dropout'], add_flags=self.c['e_add_flags']) embedded = embedding(inputs) print(embedding.get_output_shape_at(0)) print(embedded) print(N_emb) # Sentence-aggregate embeddings # model_block = module_prep_model(N_emb, self.s0pad, self.s1pad, self.c) # outputs = model_block(embedded) TDLayer = Lambda(function=lambda x: K.mean(x, axis=1), output_shape=lambda shape: (shape[0], ) + shape[2:]) e0b = TDLayer(embedded[0]) e1b = TDLayer(embedded[1]) final_outputs = [e0b, e1b] # Measurement if self.c['ptscorer'] == '1': # special scoring mode just based on the answer # (assuming that the question match is carried over to the answer # via attention or another mechanism) ptscorer = B.cat_ptscorer final_outputs = [final_outputs[1]] else: ptscorer = self.c['ptscorer'] kwargs = dict() if ptscorer == B.mlp_ptscorer: kwargs['sum_mode'] = self.c['mlpsum'] kwargs['Dinit'] = self.c['Dinit'] scoreS = Activation('linear')(ptscorer(final_outputs, self.c['Ddim'], N_emb, self.c['l2reg'], **kwargs)) out = Dense(6, kernel_regularizer=l2(self.c['l2reg']))(scoreS) outS = Activation('softmax')(out) model = Model(inputs=inputs, outputs=outS) return model