def compare_and_score(self, left, right, ent, feats): """ Final layer of the compiled model Concatenates several comparisons between the vectors of left and right contexts and the entity vector. Final dense layer takes all of these comparisons, and the final feature vector, and outputs a binary prediction. """ comparisons = [] left_dot = layers.Dot(axes=1, normalize=True)([left, ent]) right_dot = layers.Dot(axes=1, normalize=True)([right, ent]) comparisons += [left_dot, right_dot] left_diff = layers.Subtract()([left, ent]) right_diff = layers.Subtract()([right, ent]) comparisons += [left_diff, right_diff] left_diff_sq = layers.Multiply()([left_diff, left_diff]) right_diff_sq = layers.Multiply()([right_diff, right_diff]) comparisons += [left_diff_sq, right_diff_sq] left_mult = layers.Multiply()([left, ent]) right_mult = layers.Multiply()([right, ent]) comparisons += [left_mult, right_mult] if feats is not None: comparisons.append(feats) comparisons_concat = layers.Concatenate(axis=1)(comparisons) out = self.reduce_layer(comparisons_concat) return out
def model(self): """prepare the model""" in_target = layers.Input((1, ), name='in_tgt') in_context = layers.Input((1, ), name='in_ctx') embedding_dim = self.embedding_dim embedding = layers.Embedding(self.vocab_size, embedding_dim, input_length=1, name='embedding') target = embedding(in_target) target = layers.Reshape((embedding_dim, 1), name='target')(target) context = embedding(in_context) context = layers.Reshape((embedding_dim, 1), name='context')(context) dot_product = layers.Dot(axes=1)([target, context]) dot_product = layers.Reshape((1, ), name='dot')(dot_product) output = layers.Dense(1, activation='sigmoid', name='output')(dot_product) model = models.Model(inputs=[in_target, in_context], outputs=output) model.compile(loss='binary_crossentropy', optimizer='rmsprop') # for the validation model, apply cosine similarity similarity = layers.Dot(axes=1, normalize=True)([target, context]) similarity = layers.Reshape((1, ), name='sim')(similarity) validation_model = models.Model(inputs=[in_target, in_context], outputs=similarity) return model, validation_model
def build_model(self): # This function builds the NN. # The two inputs. input_state = kl.Input(shape=(4, )) input_actions = kl.Input(shape=(self.num_actions, )) # Create a NN with three fully connected hidden layers. x = kl.Dense(64, activation='tanh')(input_state) x = kl.Dropout(0.4)(x) x = kl.Dense(32, activation='tanh')(x) x = kl.Dense(16, activation='tanh')(x) # The regular output layer, for the standard forward pass # of the input_state. q = kl.Dense(self.num_actions, activation='relu')(x) # An alternative output layer, used for training. Here we # just multiply the regular output with a 3-element # input_action variable and take the sum. action_q = kl.Dot(1)([q, input_actions]) # Create two models, one for each output layer, sharing # the same hidden layers. self.q_model = km.Model(inputs=input_state, outputs=q) self.applied_action_model = km.Model( inputs=[input_state, input_actions], outputs=action_q) # We compile the model that is actually used for training. self.applied_action_model.compile(optimizer=ko.SGD(lr=1e-5), loss="mean_squared_error", metrics=['accuracy'])
def test_merge_dot(): i1 = layers.Input(shape=(4, )) i2 = layers.Input(shape=(4, )) o = layers.dot([i1, i2], axes=1) assert o._keras_shape == (None, 1) model = models.Model([i1, i2], o) dot_layer = layers.Dot(axes=1) o2 = dot_layer([i1, i2]) assert dot_layer.output_shape == (None, 1) x1 = np.random.random((2, 4)) x2 = np.random.random((2, 4)) out = model.predict([x1, x2]) assert out.shape == (2, 1) expected = np.zeros((2, 1)) expected[0, 0] = np.dot(x1[0], x2[0]) expected[1, 0] = np.dot(x1[1], x2[1]) assert_allclose(out, expected, atol=1e-4) # Test with negative tuple of axes. o = layers.dot([i1, i2], axes=(-1, -1)) assert o._keras_shape == (None, 1) model = models.Model([i1, i2], o) out = model.predict([x1, x2]) assert out.shape == (2, 1) assert_allclose(out, expected, atol=1e-4)
def create_model(embeddings_matrix, vocab_size, context, response, labels): context_input = Input(shape=(MAX_SEQUENCE_LEN, ), dtype='float32') response_input = Input(shape=(MAX_SEQUENCE_LEN, ), dtype='float32') init = RandomUniform(minval=-0.01, maxval=0.01) embeddings_layer = Embedding(vocab_size, WORD_EMBEDDINGS_LEN, weights=[embeddings_matrix], input_length=MAX_SEQUENCE_LEN, trainable=True) rnn_layer = layers.LSTM(units=UNITS, kernel_initializer=init, dropout=0.2) c_x = embeddings_layer(context_input) r_x = embeddings_layer(response_input) c_x = rnn_layer(c_x) r_x = rnn_layer(r_x) # This layer needs to be fixed, multiplication by # the context is missing preds = CustomLayer(output_dim=UNITS)([c_x, r_x]) preds = layers.Dot(axes=-1)([preds, c_x]) preds = Dense(1, activation='sigmoid')(preds) siamese_model = Model(inputs=[context_input, response_input], outputs=preds) op = Adam(lr=0.0001, clipvalue=10.0) siamese_model.compile(loss='binary_crossentropy', optimizer=op, metrics=['acc', 'binary_accuracy']) siamese_model.summary() siamese_model.fit([context, response], labels, batch_size=BATCH_SIZE, epochs=100, validation_split=0.1)
def factorization_machine(f_size, k_latent=5, embedding_reg=0.0005): def get_embed(x_input, x_size, k_latent): if x_size > 0: #category embed = Embedding( x_size, k_latent, embeddings_regularizer=l2(embedding_reg))(x_input) embed = Flatten()(embed) else: embed = Dense(k_latent, kernel_regularizer=l2(embedding_reg))(x_input) #embed = Dense(k_latent)(x_input) return embed dim_input = len(f_size) input_x = [Input(shape=(1, )) for i in range(dim_input)] biases = [get_embed(x, size, 1) for (x, size) in zip(input_x, f_size)] factors = [ get_embed(x, size, k_latent) for (x, size) in zip(input_x, f_size) ] s = Add()(factors) diffs = [layers.Subtract()([s, x]) for x in factors] dots = [layers.Dot(axes=1)([d, x]) for d, x in zip(diffs, factors)] dots = Add()(dots) dots_sum = layers.Lambda(lambda x: x / 2)(dots) biases_sum = Add()(biases) x = Add()([dots_sum, biases_sum]) model = Model(inputs=input_x, outputs=x) #output_f = factors + biases #model_features = Model(inputs=input_x, outputs=output_f) #model, model_features = build_model_1(X_train, f_size) return model
def gen_model(n_users, n_items, latent_dim, normalize): userInputLayer = layers.Input(shape=[1]) itemInputLayer = layers.Input(shape=[1]) if normalize is True: userVec = layers.Embedding(n_users, latent_dim, embeddings_initializer='random_normal', name='User_Embedding')(userInputLayer) itemVec = layers.Embedding(n_items, latent_dim, embeddings_initializer='random_normal', name='Movie_Embedding')(itemInputLayer) else: #non-negative matrix userVec = layers.Embedding( n_users, latent_dim, embeddings_initializer='random_normal', name='User_Embedding', embeddings_constraint=non_neg())(userInputLayer) itemVec = layers.Embedding( n_items, latent_dim, embeddings_initializer='random_normal', name='Movie_Embedding', embeddings_constraint=non_neg())(itemInputLayer) userBias = layers.Embedding(n_users, 1, embeddings_initializer='zeros')(userInputLayer) itemBias = layers.Embedding(n_items, 1, embeddings_initializer='zeros')(itemInputLayer) userVec = layers.Flatten()(userVec) userBias = layers.Flatten()(userBias) itemVec = layers.Flatten()(itemVec) itemBias = layers.Flatten()(itemBias) r_hat = layers.Dot(name='Dot', axes=1)([userVec, itemVec]) r_hat = layers.Add(name='Bias')([r_hat, userBias, itemBias]) #outputLayer = layers.Concatenate()([inputLayer_a, inputLayer_b]) #keras.layers.Concatenate(axis=-1) model = models.Model(inputs=[userInputLayer, itemInputLayer], outputs=r_hat) model.summary() model.compile(loss='mse', optimizer='adam') plot_model(model, to_file='tmp/model.png', show_shapes=True, show_layer_names=True) return model
def get_fd_particle_type_sub_graph(self, s, P_transpose, R, T): vs = [] for i in range(T): r = kl.Lambda( lambda z: K.sum(z[0] * z[1], axis=-1, keepdims=True))([s, R]) v = kl.Lambda(lambda z: (self.gamma**i) * z)(r) vs.append(v) s = kl.Dot(axes=(-1))([P_transpose, s]) v = kl.Add()(vs) return v
def get_score(self,user,item, artist=None): uemb = kl.Flatten()( self.emb_user_mf( user ) ) iemb = kl.Flatten()( self.emb_item_mf( item ) ) mf_vector = kl.Multiply()( [uemb, iemb] ) if self.add_dot: mf_dot = kl.Dot(1)( [uemb, iemb] ) mf_vector = kl.Concatenate()( [mf_vector, mf_dot] ) if self.include_artist: uemb = kl.Flatten()( self.emb_user_artist_mf( user ) ) aemb = kl.Flatten()( self.emb_artist_mf( artist ) ) mf_mul = kl.Multiply()( [uemb, aemb] ) if self.add_dot: mf_dot = kl.Dot(1)( [uemb, aemb] ) mf_mul = kl.Concatenate()( [mf_mul, mf_dot] ) mf_vector = kl.Concatenate()( [mf_vector, mf_mul] ) res = self.fff(mf_vector) return res
def create_discriminator(): int_input = layers.Input(shape=(10, )) input = layers.Input(shape=(28, 28)) x = layers.Reshape((28, 28, 1))(input) x = layers.Conv2D(filters=40, kernel_size=(8, 8), activation='relu', padding='same')(x) x = layers.MaxPooling2D(pool_size=(3, 3), strides=None, padding='valid', data_format=None)(x) x = layers.Dropout(0.5)(x) x = layers.Conv2D(filters=60, kernel_size=(5, 5), activation='relu', padding='same')(x) x = layers.MaxPooling2D(pool_size=(3, 3), strides=None, padding='valid', data_format=None)(x) x = layers.Dropout(0.5)(x) x = layers.Conv2D(filters=80, kernel_size=(3, 3), activation='relu', padding='same')(x) x = layers.Conv2D(filters=100, kernel_size=(3, 3), activation='relu', padding='same')(x) x = layers.MaxPooling2D(pool_size=(3, 3), strides=None, padding='valid', data_format=None)(x) x = layers.Flatten()(x) x = layers.Dropout(0.5)(x) x = layers.Dense(100)(x) x = layers.LeakyReLU(alpha=0.2)(x) x = layers.Dropout(0.3)(x) x = layers.Dense(100)(x) x = layers.LeakyReLU(alpha=0.2)(x) x = layers.Dense(10, activation="sigmoid")(x) output = layers.Dot(-1)([x, int_input]) model = Model(inputs=[input, int_input], outputs=output) model.compile(optimizer=SGD(lr=0.04, momentum=0.9), loss='binary_crossentropy') model.summary() return model
def get_bs_particle_graphs(self, s, Dense_W, V, R, temp): Dense_W(s) W = Dense_W.weights[0] Dense_W_ = kl.Dense(self.num_states, use_bias=False, W_constraint=nonneg()) Dense_W_(s) W_ = Dense_W_.weights[0] s_embed = kl.Dense(self.num_states, activation='tanh')(s) Dense_E = kl.Dense(self.num_states * self.num_states) null_input = kl.Lambda(lambda z: 0 * z[:, 0:1])(s_embed) E_logit = kl.Dense(self.num_states, activation='tanh')(null_input) E_logit = Dense_E(E_logit) E_logit = kl.Lambda(lambda z: z / temp)(E_logit) E = kl.Activation('sigmoid')(E_logit) E = kl.Reshape((self.num_states, self.num_states))(E) E = kl.Lambda(lambda z: self.mask * z)(E) Dense_W.trainable = False logit_1 = Dense_W(s) logit_2 = kl.Lambda(lambda z: W_ * (z))(E) logit_2 = kl.Dot(axes=1)([logit_2, s]) logit = kl.Add()([logit_1, logit_2]) #P = kl.Activation('softmax')(logit) P = kl.Lambda(lambda z: z / K.sum(z, axis=-1, keepdims=True))(logit) V_ = kl.Dot(axes=-1)([P, V]) r = kl.Dot(axes=-1)([s, R]) v = kl.Lambda(lambda z: z[0] + self.gamma * z[1])([r, V_]) return W, W_, E, P, V_, v
def build_q_model(self, hidden_layer_sizes=(40, 40)): """build the Q model. Returns one model for prediction and one for training. """ inp_st = layers.Input(shape=(self.state_dim, )) prev = inp_st for n in hidden_layer_sizes: prev = layers.Dense(n, activation='relu')(prev) out_ac = layers.Dense(self.action_dim)(prev) """we only want to fit the output for the action actually taken. We contract the predicted outputs with a mask that is to be provided by input when training. """ inp_mask = layers.Input(shape=(self.action_dim, )) out_masked = layers.Dot(axes=1)([out_ac, inp_mask]) model_train = Model(inputs=[inp_st, inp_mask], outputs=out_masked) model_train.compile(loss='mse', optimizer=Adam()) model_predict = Model(inputs=inp_st, outputs=out_ac) return model_train, model_predict
def fit(self, Y, T, X): """ Parameters ---------- y : outcome T : treatment X : features """ d_x, d_t, d_y = [np.shape(arr)[1:] for arr in (X, T, Y)] self.d_t = d_t # keep track in case we need to reshape output by dropping singleton dimensions self.d_y = d_y # keep track in case we need to reshape output by dropping singleton dimensions d_x, d_t, d_y = [1 if not d else d[0] for d in (d_x, d_t, d_y)] x_in, t_in = [L.Input((d,)) for d in (d_x, d_t)] # reshape in case we get fewer dimensions than expected from h (e.g. a scalar) h_out = L.Reshape((d_y, d_t))(self._h(x_in)) y_out = L.Dot([2, 1])([h_out, t_in]) self.theta = Model([x_in], self._h(x_in)) model = Model([x_in, t_in], y_out) model.compile(optimizer, loss='mse') model.fit([X, T], Y, **training_options) return self
def build_model(user_n, movie_n, latent_dim): print('Building model') user_input = layers.Input(shape=[1]) u_v = layers.Embedding(user_n, latent_dim)(user_input) u_v = layers.Flatten()(u_v) movie_input = layers.Input(shape=[1]) m_v = layers.Embedding(movie_n, latent_dim)(movie_input) m_v = layers.Flatten()(m_v) user_bias = layers.Embedding(user_n, 1)(user_input) user_bias = layers.Flatten()(user_bias) movie_bias = layers.Embedding(movie_n, 1)(movie_input) movie_bias = layers.Flatten()(movie_bias) merge = layers.Dot(axes=1)([u_v, m_v]) result = layers.Add()([merge, user_bias, movie_bias]) result = layers.Dense(1)(result) model = Model(inputs=[user_input, movie_input], outputs=[result]) model.compile(loss='mse', optimizer="adamax", metrics=[rmse]) model.summary() return model
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=( None, None, None, ), name='context', dtype='int32') query = L.Input(shape=(None, ), name='query', dtype='int32') if ilp: context, query, templates = ilp print('Found %s texts.' % len(CONTEXT_TEXTS)) word_index = WORD_INDEX print('Found %s unique tokens.' % len(word_index)) embeddings_index = {} GLOVE_DIR = os.path.abspath('.') + "/data/glove" f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), 'r', encoding='utf-8') for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) EMBEDDING_DIM = 100 embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # Contextual embeddeding of symbols # onehot_weights = np.eye(char_size) # onehot_weights[0, 0] = 0 # Clear zero index # onehot = L.Embedding(char_size, char_size, # trainable=False, # weights=[onehot_weights], # name='onehot') embedding_layer = L.Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) embedded_ctx = embedding_layer( context) # (?, rules, preds, chars, char_size) embedded_q = embedding_layer(query) # (?, chars, char_size) if ilp: # Combine the templates with the context, (?, rules+temps, preds, chars, char_size) embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1), name='template_concat')( [templates, embedded_ctx]) # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1) embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred') embedded_predq = embed_pred(embedded_q) # (?, dim) # For every rule, for every predicate, embed the predicate embedded_ctx_preds = NestedTimeDist(NestedTimeDist(embed_pred, name='nest1'), name='nest2')(embedded_ctx) # (?, rules, preds, dim) embed_rule = ZeroGRU(dim, name='embed_rule') embedded_rules = NestedTimeDist(embed_rule, name='d_embed_rule')(embedded_ctx_preds) # (?, rules, dim) # Reused layers over iterations repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1], name='repeat_to_ctx') diff_sq = L.Lambda(lambda xy: K.square(xy[0] - xy[1]), output_shape=(None, dim), name='diff_sq') mult = L.Multiply() concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2), output_shape=(None, dim * 5), name='concat') att_densel = L.Dense(dim // 2, activation='tanh', name='att_densel') att_dense = L.Dense(1, name='att_dense') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') softmax1 = L.Softmax(axis=1) unifier = NestedTimeDist(ZeroGRU(dim, go_backwards=False, name='unifier'), name='dist_unifier') dot11 = L.Dot((1, 1)) # Reasoning iterations state = embedded_predq repeated_q = repeat_toctx(embedded_predq) outs = list() for _ in range(iterations): # Compute attention between rule and query state ctx_state = repeat_toctx(state) # (?, rules, dim) s_s_c = diff_sq([ctx_state, embedded_rules]) s_m_c = mult([embedded_rules, state]) # (?, rules, dim) sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q]) sim_vec = att_densel(sim_vec) # (?, rules, dim//2) sim_vec = att_dense(sim_vec) # (?, rules, 1) sim_vec = squeeze2(sim_vec) # (?, rules) sim_vec = softmax1(sim_vec) outs.append(sim_vec) # Unify every rule and weighted sum based on attention new_states = unifier(embedded_ctx_preds, initial_state=[state]) # (?, rules, dim) state = dot11([sim_vec, new_states]) # Predication out = L.Dense(1, activation='sigmoid', name='out')(state) if ilp: return outs, out elif pca: model = Model([context, query], [embedded_rules]) elif training: model = Model([context, query], [out]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
def build(self, input_shape): print(input_shape) self.mu = self.add_weight(shape=(1, 1, 1, self.N), name=self.base_name + 'mu', initializer=self.mu_initializer, constraint=self.mu_constraint, trainable=True) self.sigma = self.add_weight(shape=(1, 1, 1, self.N), name=self.base_name + 'sigma', initializer=self.sigma_initializer, constraint=self.sigma_constraint, trainable=True) self.alpha = self.add_weight(shape=(1, 1, 1, self.N), name=self.base_name + 'alpha', initializer=self.alpha_initializer, constraint=self.alpha_constraint, trainable=True) self.conv_theta = KL.Conv2D(self.intermediate_dim, (1, 1), name=self.base_name + 'conv_theta', padding='same', use_bias=True) self.conv_theta.build(input_shape) self.conv_phi = KL.Conv2D(self.intermediate_dim, (1, 1), name=self.base_name + 'conv_phi', padding='same', use_bias=True) self.conv_phi.build(input_shape) self.conv_delta = KL.Conv2D(self.N, (1, 1), name=self.base_name + 'conv_delta', padding='same', use_bias=True) self.conv_delta.build(input_shape) self.conv_g = KL.Conv2D(self.intermediate_dim, (1, 1), name=self.base_name + 'conv_g', padding='same', use_bias=True) self.conv_g.build(input_shape) self.conv_y = KL.Conv2D(self.channels, (1, 1), name=self.base_name + 'conv_y', padding='same', use_bias=True) self.conv_y.build((input_shape[0], input_shape[1], input_shape[2], self.intermediate_dim)) self.bn_y = KL.BatchNormalization(name=self.base_name + 'bn_y', gamma_initializer='zeros') self.bn_y.build(input_shape) self.mat_mul_1 = KL.Dot(axes=2, name=self.base_name + 'mat_mul_1') self.mat_mul_1.build([ (input_shape[0], self.dim1 * self.dim2, self.intermediate_dim), (input_shape[0], self.dim1 * self.dim2, self.intermediate_dim) ]) self.mat_mul_2 = KL.Dot(axes=[2, 1], name=self.base_name + 'mat_mul_2') self.mat_mul_2.build([ (input_shape[0], self.dim1 * self.dim2, self.dim1 * self.dim2), (input_shape[0], self.dim1 * self.dim2, self.intermediate_dim) ]) self._trainable_weights += self.conv_theta.trainable_weights + self.conv_phi.trainable_weights + self.conv_delta.trainable_weights + self.conv_g.trainable_weights + self.conv_y.trainable_weights + self.bn_y.trainable_weights super(Contextual_Attention, self).build(input_shape)
def AID_CreateModel(input_shape, alpha_hinge=0.2, Spatial_Dropout=False, BN=True, B5_FC1_neurons=1024, similarity='simCos', desc_dim=128, desc_between_0_1=False, BigDesc=False, verbose=True): # descriptor model in_desc = layers.Input(shape=input_shape, name='input_patches') x = layers.Conv2D(64, (3, 3), padding='same', name='block1_conv1')(in_desc) if BN: x = layers.BatchNormalization(name='block1_BN1')(x) x = layers.Activation('relu', name='block1_relu1')(x) x = layers.Conv2D(64, (3, 3), padding='same', name='block1_conv2')(x) if BN: x = layers.BatchNormalization(name='block1_BN2')(x) x = layers.Activation('relu', name='block1_relu2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = layers.Conv2D(64, (3, 3), padding='same', name='block2_conv1')(x) if BN: x = layers.BatchNormalization(name='block2_BN1')(x) x = layers.Activation('relu', name='block2_relu1')(x) x = layers.Conv2D(64, (3, 3), padding='same', name='block2_conv2')(x) if BN: x = layers.BatchNormalization(name='block2_BN2')(x) x = layers.Activation('relu', name='block2_relu2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = layers.Conv2D(128, (3, 3), padding='same', name='block3_conv1')(x) if BN: x = layers.BatchNormalization(name='block3_BN1')(x) x = layers.Activation('relu', name='block3_relu1')(x) x = layers.Conv2D(128, (3, 3), padding='same', name='block3_conv2')(x) if BN: x = layers.BatchNormalization(name='block3_BN2')(x) x = layers.Activation('relu', name='block3_relu2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = layers.Conv2D(128, (3, 3), padding='same', name='block4_conv1')(x) if BN: x = layers.BatchNormalization(name='block4_BN1')(x) x = layers.Activation('relu', name='block4_relu1')(x) x = layers.Conv2D(128, (3, 3), padding='same', name='block4_conv2')(x) if BigDesc == False and BN: x = layers.BatchNormalization(name='block4_BN2')(x) if Spatial_Dropout: x = layers.SpatialDropout2D(p=0.5, name='block4_Dropout1')(x) if BigDesc == False: x = layers.Activation('relu', name='block4_relu2')(x) # Block 5 x = layers.Flatten(name='block5_flatten1')(x) if BigDesc == False: if B5_FC1_neurons > 0: x = layers.Dense(B5_FC1_neurons, activation='relu', name='block5_FC1')(x) if desc_between_0_1: x = layers.Dense(desc_dim, activation='sigmoid', name='block5_FC2')(x) else: x = layers.Dense(desc_dim, name='block5_FC2')(x) desc_model = Model(in_desc, x, name='aff_desc') # similarity model if similarity[0:5] == 'simFC': if similarity[5:] == '_concat' or similarity[5:] == '_concat_BigDesc': sim_type = 'concat' desc_dim = 2 * desc_model.output_shape[1] elif similarity[5:] == '_diff': sim_type = 'diff' # 2 siamese network in_desc1 = layers.Input(shape=input_shape, name='input_patches1') in_desc2 = layers.Input(shape=input_shape, name='input_patches2') emb_1 = desc_model(in_desc1) emb_2 = desc_model(in_desc2) # Similarity model in_sim = layers.Input(shape=(desc_dim, ), name='input_diff_desc') x = layers.Dense(64, activation='relu', name='block1_FC1')(in_sim) x = layers.Dense(32, activation='relu', name='block1_FC2')(x) x = layers.Dense(1, activation='sigmoid', name='block1_FC3')(x) sim_model = Model(in_sim, x, name='sim') if sim_type == 'concat': x = layers.Concatenate(name='Concat')([emb_1, emb_2]) else: x = layers.Subtract(name='Subtract')([emb_1, emb_2]) out_net = sim_model(x) # Groundtruth Model in_GT = layers.Input(shape=(1, ), name='input_GroundTruth') GT_model = Model(in_GT, in_GT, name='GroundTruth') out_GT = GT_model(in_GT) class TopLossLayerClass(layers.Layer): def __init__(self, **kwargs): super(TopLossLayerClass, self).__init__(**kwargs) def call(self, inputs): #out_net, out_GT = inputs s, t = inputs # t=1 -> Positive class, t=0 -> Negative class loss = K.sum(t * K.log(s) + (1 - t) * K.log(1 - s)) self.add_loss(loss) return loss TopLossLayer_obj = TopLossLayerClass(name='TopLossLayer') TopLossLayer = TopLossLayer_obj([out_net, out_GT]) train_model = Model([in_desc1, in_desc2, in_GT], TopLossLayer, name='TrainModel') elif similarity == 'simCos': # hinge loss # Similarity model desc_dim = desc_model.output_shape[1] in_sim1 = layers.Input(shape=(desc_dim, ), name='input_desc1') in_sim2 = layers.Input(shape=(desc_dim, ), name='input_desc2') x = layers.Dot(axes=1, normalize=True, name='CosineProximity')([in_sim1, in_sim2]) # cosine proximity sim_model = Model([in_sim1, in_sim2], x, name='sim') # 3 siamese networks in_desc1 = layers.Input(shape=input_shape, name='input_patches_anchor') in_desc2 = layers.Input(shape=input_shape, name='input_patches_positive') in_desc3 = layers.Input(shape=input_shape, name='input_patches_negative') emb_1 = desc_model(in_desc1) emb_2 = desc_model(in_desc2) emb_3 = desc_model(in_desc3) sim_type = 'inlist' out_net_positive = sim_model([emb_1, emb_2]) out_net_negative = sim_model([emb_1, emb_3]) class TopLossLayerClass(layers.Layer): def __init__(self, alpha=0.2, **kwargs): self.alpha = alpha super(TopLossLayerClass, self).__init__(**kwargs) def call(self, inputs): out_net_positive, out_net_negative = inputs # Hinge loss computation loss = K.sum( K.maximum(out_net_negative - out_net_positive + self.alpha, 0)) #,axis=0) self.add_loss(loss) return loss TopLossLayer_obj = TopLossLayerClass(name='TopLossLayer', alpha=alpha_hinge) TopLossLayer = TopLossLayer_obj([out_net_positive, out_net_negative]) train_model = Model([in_desc1, in_desc2, in_desc3], TopLossLayer, name='TrainModel') if verbose: print( '\n\n-------> The network architecture for the affine descriptor computation !' ) desc_model.summary() print( '\n\n-------> The network architecture for the similarity computation !' ) sim_model.summary() print('\n\n-------> Train model connections') train_model.summary() return train_model, sim_type
def k_func(x, y): return klayers.Dot(axes=[1, 1], normalize=False)([x, y])
def k_func(x, y): return klayers.Dot(axes=[1, 2], normalize=True)([x, y])
def build_birnn_multifeature_coattention_model(voca_dim, time_steps, num_feature_channels, num_features, feature_dim, output_dim, model_dim, atten_dim, mlp_dim, item_embedding=None, rnn_depth=1, mlp_depth=1, drop_out=0.5, rnn_drop_out=0., rnn_state_drop_out=0., trainable_embedding=False, gpu=False, return_customized_layers=False): """ Create A Bidirectional Attention Model. :param voca_dim: vocabulary dimension size. :param time_steps: the length of input :param output_dim: the output dimension size :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param item_embedding: integer, numpy 2D array, or None (default=None) If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor. If item_embedding is a matrix, this matrix will be used as the embedding matrix. If item_embedding is None, then connect input tensor to RNN layer directly. :param rnn_depth: rnn depth :param mlp_depth: the depth of fully connected layers :param num_feature_channels: the number of attention channels, this can be used to mimic multi-head attention mechanism :param drop_out: dropout rate of fully connected layers :param rnn_drop_out: dropout rate of rnn layers :param rnn_state_drop_out: dropout rate of rnn state tensor :param trainable_embedding: boolean :param gpu: boolean, default=False If True, CuDNNLSTM is used instead of LSTM for RNN layer. :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ if model_dim % 2 == 1: model_dim += 1 if item_embedding is not None: inputs = models.Input(shape=(time_steps, ), dtype='int32', name='input0') x1 = inputs # item embedding if isinstance(item_embedding, np.ndarray): assert voca_dim == item_embedding.shape[0] x1 = layers.Embedding(voca_dim, item_embedding.shape[1], input_length=time_steps, weights=[ item_embedding, ], trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) elif utils.is_integer(item_embedding): x1 = layers.Embedding(voca_dim, item_embedding, input_length=time_steps, trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) else: raise ValueError( "item_embedding must be either integer or numpy matrix") else: inputs = models.Input(shape=(time_steps, voca_dim), dtype='float32', name='input0') x1 = inputs inputs1 = list() for fi in range(num_feature_channels): inputs1.append( models.Input(shape=(num_features, feature_dim), dtype='float32', name='input1' + str(fi))) feature_map_layer = layers.TimeDistributed(layers.Dense( model_dim, name="feature_map_layer", activation="sigmoid"), name="td_feature_map_layer") x2s = list(map(lambda input_: feature_map_layer(input_), inputs1)) if gpu: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2), return_sequences=True), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) x1 = layers.Dropout(rnn_drop_out, name="rnn_dropout_layer" + str(i))(x1) else: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.LSTM( int(model_dim / 2), return_sequences=True, dropout=rnn_drop_out, recurrent_dropout=rnn_state_drop_out), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) coatten_layer = clayers.CoAttentionWeight(name="coattention_weights_layer") featnorm_layer1 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer1", axis=1) featnorm_layer2 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer2", axis=2) focus_layer1 = layers.Dot((1, 1), name="focus_layer1") focus_layer2 = layers.Dot((2, 1), name="focus_layer2") pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1") pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2") compare_layer1 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer1") compare_layer2 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer2") flatten_layer = layers.Flatten(name="flatten_layer") xs = list() for x2_ in x2s: xs += _coatten_compare_aggregate(coatten_layer, featnorm_layer1, featnorm_layer2, focus_layer1, focus_layer2, pair_layer1, pair_layer2, compare_layer1, compare_layer2, flatten_layer, x1, x2_) x = layers.Concatenate(axis=1, name="concat_feature_layer")(xs) # MLP Layers for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model([inputs] + inputs1, outputs) if return_customized_layers: return model, { 'CoAttentionWeight': clayers.CoAttentionWeight, "FeatureNormalization": clayers.FeatureNormalization } return model
def init_model(self, train, std=0.01): #current_item = kl.Input( ( 1, ), name="current_item" ) item = kl.Input((1, ), dtype=self.intX) #, batch_shape=(self.,self.steps) ) user = kl.Input((1, ), dtype=self.intX) #, batch_shape=(self.batch,1) ) if self.include_artist: artist = kl.Input((1, ), dtype=self.intX) #, batch_shape=(self.batch,1) ) emb_user_mf = Embedding(output_dim=self.factors, input_dim=self.num_users, embeddings_regularizer=l2(self.emb_reg)) emb_user = Embedding(output_dim=self.factors, input_dim=self.num_users, embeddings_regularizer=l2(self.emb_reg)) emb_item_mf = Embedding(output_dim=self.factors, input_dim=self.num_items, embeddings_regularizer=l2(self.emb_reg)) emb_item = Embedding(output_dim=self.factors, input_dim=self.num_items, embeddings_regularizer=l2(self.emb_reg)) if self.include_artist: emb_user_artist_mf = Embedding(output_dim=self.factors, input_dim=self.num_artists, embeddings_regularizer=l2( self.emb_reg)) emb_artist_mf = Embedding(output_dim=self.factors, input_dim=self.num_artists, embeddings_regularizer=l2(self.emb_reg)) emb_artist = Embedding(output_dim=self.factors, input_dim=self.num_artists, embeddings_regularizer=l2(self.emb_reg)) #MF PART uemb = kl.Flatten()(emb_user_mf(user)) iemb = kl.Flatten()(emb_item_mf(item)) mf_dot = kl.Dot(1)([uemb, iemb]) mf_mul = kl.Multiply()([uemb, iemb]) mf_vector = kl.Concatenate()([mf_mul, mf_dot]) #mf_vector = mf_mul if self.include_artist: uemb = kl.Flatten()(emb_user_artist_mf(user)) aemb = kl.Flatten()(emb_artist_mf(item)) mf_dot = kl.Dot(1)([uemb, aemb]) mf_mul = kl.Multiply()([uemb, aemb]) mf_vector = kl.Concatenate()([mf_vector, mf_mul, mf_dot]) #MLP PART uemb = kl.Flatten()(emb_user(user)) iemb = kl.Flatten()(emb_item(item)) mlp_vector = kl.Concatenate()([uemb, iemb]) if self.include_artist: emba = kl.Flatten()(emb_artist(artist)) mlp_vector = kl.Concatenate()([mlp_vector, emba]) for i in range(len(self.layers)): layer = kl.Dense(self.layers[i], activation='relu', name="layer%d" % i, kernel_regularizer=l2(self.layer_reg)) mlp_vector = layer(mlp_vector) #PRED PART comb = kl.Concatenate()([mf_vector, mlp_vector]) #, uemb ] ) fff = kl.Dense(1, activation='linear', kernel_initializer='lecun_uniform', kernel_regularizer=l2(self.layer_reg)) res = fff(comb) inputs = [user, item] #+ [artist if self.include_artist: inputs += [artist] outputs = [res] predict_model = km.Model(inputs, outputs) current_user = kl.Input( (1, ), name="current_user") # , batch_shape=(self.batch, self.steps) ) current_item_pos = kl.Input( (1, ), dtype=self.intX, name="current_item_pos") #, batch_shape=(self.batch,1) ) current_item_neg = kl.Input( (1, ), dtype=self.intX, name="current_item_neg") #, batch_shape=(self.batch,1) ) pred_from_pos = [current_user, current_item_pos] pred_from_neg = [current_user, current_item_neg] if self.include_artist: current_artist_pos = kl.Input( (1, ), name="current_artist_pos" ) # , batch_shape=(self.batch, self.steps) ) current_artist_neg = kl.Input( (1, ), name="current_artist_neg" ) # , batch_shape=(self.batch, self.steps) ) pred_from_neg += [current_artist_neg] pred_from_pos += [current_artist_pos] current_res_pos = predict_model(pred_from_pos) #, current_user ] ) current_res_neg = predict_model(pred_from_neg) #, current_user ] ) inputs = [current_user, current_item_pos, current_item_neg] #+ [current_user] if self.include_artist: inputs += [current_artist_pos, current_artist_neg] outputs = [current_res_pos, current_res_neg] model = km.Model(inputs, outputs) model.add_loss(K.mean(self.bpr(outputs))) if self.optimizer == 'adam': opt = keras.optimizers.Adam(lr=self.learning_rate) elif self.optimizer == 'adagrad': opt = keras.optimizers.Adagrad(lr=self.learning_rate) elif self.optimizer == 'adadelta': opt = keras.optimizers.Adadelta(lr=self.learning_rate * 10) elif self.optimizer == 'sgd': opt = keras.optimizers.SGD(lr=self.learning_rate) model.compile(optimizer=opt) return model, predict_model
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=( None, None, None, ), name='context', dtype='int32') query = L.Input(shape=(None, ), name='query', dtype='int32') # Flatten preds to embed entire rules var_flat = L.Lambda(lambda x: K.reshape( x, K.stack([K.shape(x)[0], -1, K.prod(K.shape(x)[2:])])), name='var_flat') flat_ctx = var_flat(context) # (?, rules, preds*chars) # Onehot embeddeding of symbols onehot_weights = np.eye(char_size) onehot_weights[0, 0] = 0 # Clear zero index onehot = L.Embedding(char_size, char_size, trainable=False, weights=[onehot_weights], name='onehot') embedded_ctx = onehot(flat_ctx) # (?, rules, preds*chars*char_size) embedded_q = onehot(query) # (?, chars, char_size) # Embed predicates embed_pred = ZeroGRU(dim, go_backwards=True, return_sequences=True, return_state=True, name='embed_pred') embedded_predqs, embedded_predq = embed_pred(embedded_q) # (?, chars, dim) embed_pred.return_sequences = False embed_pred.return_state = False # Embed every rule embedded_rules = L.TimeDistributed(embed_pred, name='rule_embed')(embedded_ctx) # (?, rules, dim) # Reused layers over iterations concatm1 = L.Concatenate(name='concatm1') repeat_toqlen = L.RepeatVector(K.shape(embedded_q)[1], name='repeat_toqlen') mult_cqi = L.Multiply(name='mult_cqi') dense_cqi = L.Dense(dim, name='dense_cqi') dense_cais = L.Dense(1, name='dense_cais') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') softmax1 = L.Softmax(axis=1, name='softmax1') dot11 = L.Dot((1, 1), name='dot11') repeat_toctx = L.RepeatVector(K.shape(context)[1], name='repeat_toctx') memory_dense = L.Dense(dim, name='memory_dense') kb_dense = L.Dense(dim, name='kb_dense') mult_info = L.Multiply(name='mult_info') info_dense = L.Dense(dim, name='info_dense') mult_att_dense = L.Multiply(name='mult_att_dense') read_att_dense = L.Dense(1, name='read_att_dense') mem_info_dense = L.Dense(dim, name='mem_info_dense') stack1 = L.Lambda(lambda xs: K.stack(xs, 1), output_shape=(None, dim), name='stack1') mult_self_att = L.Multiply(name='mult_self_att') self_att_dense = L.Dense(1, name='self_att_dense') misa_dense = L.Dense(dim, use_bias=False, name='misa_dense') mi_info_dense = L.Dense(dim, name='mi_info_dense') add_mip = L.Lambda(lambda xy: xy[0] + xy[1], name='add_mip') control_gate = L.Dense(1, activation='sigmoid', name='control_gate') gate2 = L.Lambda(lambda xyg: xyg[2] * xyg[0] + (1 - xyg[2]) * xyg[1], name='gate') # Init control and memory zeros_like = L.Lambda(K.zeros_like, name='zeros_like') memory = embedded_predq # (?, dim) control = zeros_like(memory) # (?, dim) pmemories, pcontrols = [memory], [control] # Reasoning iterations outs = list() for i in range(iterations): # Control Unit qi = L.Dense(dim, name='qi' + str(i))(embedded_predq) # (?, dim) cqi = dense_cqi(concatm1([control, qi])) # (?, dim) cais = dense_cais(mult_cqi([repeat_toqlen(cqi), embedded_predqs])) # (?, qlen, 1) cais = squeeze2(cais) # (?, qlen) cais = softmax1(cais) # (?, qlen) outs.append(cais) new_control = dot11([cais, embedded_predqs]) # (?, dim) # Read Unit info = mult_info( [repeat_toctx(memory_dense(memory)), kb_dense(embedded_rules)]) # (?, rules, dim) infop = info_dense(concatm1([info, embedded_rules])) # (?, rules, dim) rai = read_att_dense(mult_att_dense([repeat_toctx(new_control), infop])) # (?, rules, 1) rai = squeeze2(rai) # (?, rules) rai = softmax1(rai) # (?, rules) outs.append(rai) read = dot11([rai, embedded_rules]) # (?, dim) # Write Unit mi_info = mem_info_dense(concatm1([read, memory])) # (?, dim) past_ctrls = stack1(pcontrols) # (?, i+1, dim) sai = self_att_dense( mult_self_att([L.RepeatVector(i + 1)(new_control), past_ctrls])) # (?, i+1, 1) sai = squeeze2(sai) # (?, i+1) sai = softmax1(sai) # (?, i+1) outs.append(sai) past_mems = stack1(pmemories) # (?, i+1, dim) misa = L.dot([sai, past_mems], (1, 1), name='misa_' + str(i)) # (?, dim) mip = add_mip([misa_dense(misa), mi_info_dense(mi_info)]) # (?, dim) cip = control_gate(new_control) # (?, 1) outs.append(cip) new_memory = gate2([mip, memory, cip]) # (?, dim) # Update state pcontrols.append(new_control) pmemories.append(new_memory) memory, control = new_memory, new_control # Output Unit out = L.Dense(1, activation='sigmoid', name='out')(concatm1([embedded_predq, memory])) if training: model = Model([context, query], out) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
print(x_test_sr.shape, y_test.shape, adjacency_test_near.shape, adjacency_test_middle.shape, adjacency_test_distant.shape) plt.figure(figsize=(20, 10)) plt.plot(y_test, 'r') ################################################ Model: Multi-STGCnet-SR # input features = Input(shape=(n, pre_sr)) adjacency_near = Input(shape=(n, n)) adjacency_middle = Input(shape=(n, n)) adjacency_distant = Input(shape=(n, n)) # near # GCN layer output_near_start = layers.Dot(axes=1)([adjacency_near, features]) output = layers.Dense(n, activation='relu')(output_near_start) # GCN layer output = layers.Dot(axes=1)([adjacency_near, output]) output = layers.Dense(n, activation='relu')(output) output = layers.Permute((2, 1))(output) output = layers.LSTM(32, return_sequences=True)(output) output = layers.LSTM(12, kernel_initializer='random_normal')(output) output_near_end = layers.Dense(1, activation='relu', kernel_initializer='random_normal')(output) # middle # GCN layer output_middle_start = layers.Dot(axes=1)([adjacency_middle, features]) output = layers.Dense(n, activation='relu')(output_middle_start)
# In[11]: print(GPUs) # In[39]: from keras import layers with tf.device(GPUs[0]): input_a = Input(shape=(INPUT_SHAPE, )) processed_a = base_network(input_a) # with tf.device(GPUs[1]): input_b = Input(shape=(INPUT_SHAPE, )) processed_b = base_network(input_b) cos_distance = layers.Dot(axes=-1, normalize=True)([processed_a, processed_b]) siamese_net = Model([input_a, input_b], cos_distance) # In[40]: siamese_net.summary() # ### 7. Compile the model. # In[41]: def contrastive_loss(y_true, y_pred): '''Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf '''
y_train = labels[:4000] y_test = labels[4000:] adjacency_test_near = adjacencys[4000:] print(x_train_near.shape, y_train.shape, adjacency_train_near.shape) print(x_test_near.shape, y_test.shape, adjacency_test_near.shape) ################################################ Model: Near Block - Multi-STGCnet from keras import Input, models, layers features = Input(shape=(n, pre_sr)) adjacency = Input(shape=(n, n)) # adjacency matrix #################### spatial component # GCN layer output = layers.Dot(axes=1)([adjacency, features]) output = layers.Dense(n, activation='relu')(output) # GCN layer output = layers.Dot(axes=1)([adjacency, output]) output = layers.Dense(n, activation='relu')(output) #################### temporal component # LSTM output = layers.Permute((2, 1))(output) output = layers.LSTM(32, return_sequences=True)(output) output = layers.LSTM(12, kernel_initializer='random_normal')(output) # output layer output = layers.Dense(1, activation='relu', kernel_initializer='random_normal')(output) model = models.Model(inputs=[features, adjacency], outputs=[output])
def build_birnn_feature_coattention_cnn_model(voca_dim, time_steps, num_features, feature_dim, output_dim, model_dim, mlp_dim, num_filters, filter_sizes, item_embedding=None, rnn_depth=1, mlp_depth=1, drop_out=0.5, rnn_drop_out=0., rnn_state_drop_out=0., cnn_drop_out=0.5, pooling='max', trainable_embedding=False, gpu=False, return_customized_layers=False): """ Create A Bidirectional Attention Model. :param voca_dim: vocabulary dimension size. :param time_steps: the length of input :param output_dim: the output dimension size :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param item_embedding: integer, numpy 2D array, or None (default=None) If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor. If item_embedding is a matrix, this matrix will be used as the embedding matrix. If item_embedding is None, then connect input tensor to RNN layer directly. :param rnn_depth: rnn depth :param mlp_depth: the depth of fully connected layers :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism :param drop_out: dropout rate of fully connected layers :param rnn_drop_out: dropout rate of rnn layers :param rnn_state_drop_out: dropout rate of rnn state tensor :param trainable_embedding: boolean :param gpu: boolean, default=False If True, CuDNNLSTM is used instead of LSTM for RNN layer. :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ if model_dim % 2 == 1: model_dim += 1 if item_embedding is not None: inputs = models.Input(shape=(time_steps, ), dtype='int32', name='input0') x1 = inputs # item embedding if isinstance(item_embedding, np.ndarray): assert voca_dim == item_embedding.shape[0] x1 = layers.Embedding(voca_dim, item_embedding.shape[1], input_length=time_steps, weights=[ item_embedding, ], trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) elif utils.is_integer(item_embedding): x1 = layers.Embedding(voca_dim, item_embedding, input_length=time_steps, trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) else: raise ValueError( "item_embedding must be either integer or numpy matrix") else: inputs = models.Input(shape=(time_steps, voca_dim), dtype='float32', name='input0') x1 = inputs inputs1 = models.Input(shape=(num_features, feature_dim), dtype='float32', name='input1') x2 = layers.Dense(feature_dim, name="feature_map_layer", activation="relu")(inputs1) if gpu: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2), return_sequences=True), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) x1 = layers.Dropout(rnn_drop_out, name="rnn_dropout_layer" + str(i))(x1) else: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.LSTM( int(model_dim / 2), return_sequences=True, dropout=rnn_drop_out, recurrent_dropout=rnn_state_drop_out), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) # attention attens = clayers.CoAttentionWeight(name="coattention_weights_layer")( [x1, x2]) attens1 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer1", axis=1)(attens) attens2 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer2", axis=2)(attens) # compare focus1 = layers.Dot((1, 1), name="focus_layer1")([attens1, x1]) focus2 = layers.Dot((2, 1), name="focus_layer2")([attens2, x2]) pair1 = layers.Concatenate(axis=-1, name="pair_layer1")([x1, focus2]) pair2 = layers.Concatenate(axis=-1, name="pair_layer2")([x2, focus1]) x1 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer1")(pair1) x2 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer2")(pair2) # Multi-Channel CNN for x1 pooled_outputs = [] for i in range(len(filter_sizes)): conv = layers.Conv1D(num_filters, kernel_size=filter_sizes[i], padding='valid', activation='relu')(x1) if pooling == 'max': conv = layers.MaxPooling1D(pool_size=time_steps - filter_sizes[i] + 1, strides=1, padding='valid')(conv) else: conv = layers.AveragePooling1D(pool_size=time_steps - filter_sizes[i] + 1, strides=1, padding='valid')(conv) pooled_outputs.append(conv) x1 = layers.Concatenate(name='concated_layer')(pooled_outputs) x1 = layers.Flatten()(x1) x1 = layers.Dropout(cnn_drop_out, name='conv_dropout_layer')(x1) x1 = layers.BatchNormalization(name="batch_norm_layer")(x1) # Average Pool for x2 x2 = layers.GlobalAveragePooling1D(name="average_pool_layer")(x2) x = layers.Concatenate(axis=1, name="concat_deep_feature_layer")([x1, x2]) # MLP Layers for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs, outputs) if return_customized_layers: return model, { 'CoAttentionWeight': clayers.CoAttentionWeight, "FeatureNormalization": clayers.FeatureNormalization } return model
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=(None, None, None,), name='context', dtype='int32') query = L.Input(shape=(None,), name='query', dtype='int32') if ilp: context, query, templates = ilp # Contextual embeddeding of symbols onehot_weights = np.eye(char_size) onehot_weights[0, 0] = 0 # Clear zero index onehot = L.Embedding(char_size, char_size, trainable=False, weights=[onehot_weights], name='onehot') embedded_ctx = onehot(context) # (?, rules, preds, chars, char_size) embedded_q = onehot(query) # (?, chars, char_size) if ilp: # Combine the templates with the context, (?, rules+temps, preds, chars, char_size) embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1), name='template_concat')([templates, embedded_ctx]) # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1) embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred') embedded_predq = embed_pred(embedded_q) # (?, dim) # For every rule, for every predicate, embed the predicate embedded_ctx_preds = NestedTimeDist(NestedTimeDist(embed_pred, name='nest1'), name='nest2')(embedded_ctx) # (?, rules, preds, dim) embed_rule = ZeroGRU(dim, name='embed_rule') embedded_rules = NestedTimeDist(embed_rule, name='d_embed_rule')(embedded_ctx_preds) # (?, rules, dim) # Reused layers over iterations repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1], name='repeat_to_ctx') diff_sq = L.Lambda(lambda xy: K.square(xy[0]-xy[1]), output_shape=(None, dim), name='diff_sq') mult = L.Multiply() concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2), output_shape=(None, dim*5), name='concat') att_densel = L.Dense(dim//2, activation='tanh', name='att_densel') att_dense = L.Dense(1, name='att_dense') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') softmax1 = L.Softmax(axis=1) unifier = NestedTimeDist(ZeroGRU(dim, go_backwards=False, name='unifier'), name='dist_unifier') dot11 = L.Dot((1, 1)) # Reasoning iterations state = embedded_predq repeated_q = repeat_toctx(embedded_predq) outs = list() for _ in range(iterations): # Compute attention between rule and query state ctx_state = repeat_toctx(state) # (?, rules, dim) s_s_c = diff_sq([ctx_state, embedded_rules]) s_m_c = mult([embedded_rules, state]) # (?, rules, dim) sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q]) sim_vec = att_densel(sim_vec) # (?, rules, dim//2) sim_vec = att_dense(sim_vec) # (?, rules, 1) sim_vec = squeeze2(sim_vec) # (?, rules) sim_vec = softmax1(sim_vec) outs.append(sim_vec) # Unify every rule and weighted sum based on attention new_states = unifier(embedded_ctx_preds, initial_state=[state]) # (?, rules, dim) state = dot11([sim_vec, new_states]) # Predication out = L.Dense(1, activation='sigmoid', name='out')(state) if ilp: return outs, out elif pca: model = Model([context, query], [embedded_rules]) elif training: model = Model([context, query], [out]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
def build_inter_coattention_cnn_model(num_feature_channels1, num_feature_channels2, num_features1, num_features2, feature_dim1, output_dim, num_filters, filter_sizes, atten_dim, model_dim, mlp_dim, mlp_depth=1, drop_out=0.5, pooling='max', padding='valid', return_customized_layers=False): """ Create A Multi-Layer Perceptron Model with Coattention Mechanism. inputs: embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B outputs: [batch, num_classes] # in our case there should be 3 output classes: A, B, None :param output_dim: the output dimension size :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param mlp_depth: the depth of fully connected layers :param drop_out: dropout rate of fully connected layers :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ def _mlp_channel1(feature_dropout_layer, x): #x = feature_dropout_layer(x) return x def _mlp_channel2(feature_map_layer, x): x = feature_map_layer(x) return x # inputs inputs1 = list() for fi in range(num_feature_channels1): inputs1.append( models.Input(shape=(num_features1, feature_dim1), dtype='float32', name='input1_' + str(fi))) inputs2 = list() for fi in range(num_feature_channels2): inputs2.append( models.Input(shape=(num_features2, ), dtype='float32', name='input2_' + str(fi))) # define feature map layers # MLP Layers feature_dropout_layer1 = layers.TimeDistributed( layers.Dropout(rate=drop_out, name="input_dropout_layer")) feature_map_layer2 = layers.Dense(feature_dim1, name="feature_map_layer2", activation="relu") x1 = [_mlp_channel1(feature_dropout_layer1, input_) for input_ in inputs1] x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2] # From mention-pair embeddings reshape_layer = layers.Reshape((1, feature_dim1), name="reshape_layer") x2 = [reshape_layer(x2_) for x2_ in x2] pair1 = layers.Concatenate( axis=1, name="concate_pair1_layer")([x1[0], x1[1], x2[0]]) pair2 = layers.Concatenate( axis=1, name="concate_pair2_layer")([x1[0], x1[2], x2[1]]) coatten_layer = RemappedCoAttentionWeight(atten_dim, name="coattention_weights_layer") featnorm_layer1 = FeatureNormalization( name="normalized_coattention_weights_layer1", axis=1) featnorm_layer2 = FeatureNormalization( name="normalized_coattention_weights_layer2", axis=2) focus_layer1 = layers.Dot((1, 1), name="focus_layer1") focus_layer2 = layers.Dot((2, 1), name="focus_layer2") pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1") pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2") # attention attens = coatten_layer([pair1, pair2]) attens1 = featnorm_layer1(attens) attens2 = featnorm_layer2(attens) # compare focus1 = focus_layer1([attens1, pair1]) focus2 = focus_layer2([attens2, pair2]) pair1 = pair_layer1([pair1, focus2]) pair2 = pair_layer2([pair2, focus1]) x = layers.Concatenate(axis=1, name="concate_layer")([pair1, pair2]) x = layers.TimeDistributed( layers.Dropout(rate=drop_out, name="pair_dropout_layer"))(x) x = layers.TimeDistributed( layers.Dense(mlp_dim, name="pair_feature_map_layer", activation="relu"))(x) x = layers.Flatten(name="pair_feature_flatten_layer1")(x) # pooled_outputs = [] # for i in range(len(filter_sizes)): # conv = layers.Conv1D(num_filters[i], kernel_size=filter_sizes[i], padding=padding, activation='relu')(x) # if pooling == 'max': # conv = layers.GlobalMaxPooling1D(name='global_pooling_layer' + str(i))(conv) # else: # conv = layers.GlobalAveragePooling1D(name='global_pooling_layer' + str(i))(conv) # pooled_outputs.append(conv) # if len(pooled_outputs) > 1: # x = layers.Concatenate(name='concated_layer')(pooled_outputs) # else: # x = conv # MLP Layers x = layers.BatchNormalization(name='batch_norm_layer')(x) x = layers.Dropout(rate=drop_out, name="dropout_layer")(x) for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs1 + inputs2, outputs) if return_customized_layers: return model, { 'RemappedCoAttentionWeight': RemappedCoAttentionWeight, "FeatureNormalization": FeatureNormalization } return model
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) # context = L.Input(shape=(None, None, None,), name='context', dtype='int32') # query = L.Input(shape=(None,), name='query', dtype='int32') if ilp: context, query, templates = ilp # Contextual embeddeding of symbols # texts = [] # list of text samples # id_list = [] # question_list = [] # label_list = [] # labels_index = {} # dictionary mapping label name to numeric id # labels = [] # list of label ids # TEXT_DATA_DIR = os.path.abspath('.') + "/data/pararule" # # TEXT_DATA_DIR = "D:\\AllenAI\\20_newsgroup" # Str = '.jsonl' # CONTEXT_TEXTS = [] # test_str = 'test' # meta_str = 'meta' # for name in sorted(os.listdir(TEXT_DATA_DIR)): # path = os.path.join(TEXT_DATA_DIR, name) # if os.path.isdir(path): # label_id = len(labels_index) # labels_index[name] = label_id # for fname in sorted(os.listdir(path)): # fpath = os.path.join(path, fname) # if Str in fpath: # if test_str not in fpath: # if meta_str not in fpath: # with open(fpath) as f: # for l in json_lines.reader(f): # if l["id"] not in id_list: # id_list.append(l["id"]) # questions = l["questions"] # context = l["context"].replace("\n", " ") # context = re.sub(r'\s+', ' ', context) # CONTEXT_TEXTS.append(context) # for i in range(len(questions)): # text = questions[i]["text"] # label = questions[i]["label"] # if label == True: # t = 1 # else: # t = 0 # q = re.sub(r'\s+', ' ', text) # texts.append(context) # question_list.append(q) # label_list.append(int(t)) # f.close() # # labels.append(label_id) print('Found %s texts.' % len(CONTEXT_TEXTS)) # MAX_NB_WORDS = 20000 # MAX_SEQUENCE_LENGTH = 1000 # tokenizer = Tokenizer(nb_words=MAX_NB_WORDS) # tokenizer.fit_on_texts(texts) # #sequences = tokenizer.texts_to_sequences(texts) word_index = WORD_INDEX print('Found %s unique tokens.' % len(word_index)) #data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) # labels = to_categorical(np.asarray(labels)) #print('Shape of data tensor:', data.shape) # print('Shape of label tensor:', labels.shape) # split the data into a training set and a validation set # indices = np.arange(data.shape[0]) # np.random.shuffle(indices) # data = data[indices] # labels = labels[indices] embeddings_index = {} GLOVE_DIR = os.path.abspath('.') + "/data/glove" f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), 'r', encoding='utf-8') for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) EMBEDDING_DIM = 100 embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector embedding_layer = L.Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) context = L.Input(shape=( None, None, None, ), name='context', dtype='int32') query = L.Input(shape=(None, ), name='query', dtype='int32') embedded_ctx = embedding_layer( context) # (?, rules, preds, chars, char_size) embedded_q = embedding_layer(query) # (?, chars, char_size) #onehot_weights = np.eye(char_size) #onehot_weights[0, 0] = 0 # Clear zero index # onehot = L.Embedding(char_size, char_size, # trainable=False, # weights=[onehot_weights], # name='onehot') # embedded_ctx = onehot(context) # (?, rules, preds, chars, char_size) # embedded_q = onehot(query) # (?, chars, char_size) if ilp: # Combine the templates with the context, (?, rules+temps, preds, chars, char_size) embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1), name='template_concat')( [templates, embedded_ctx]) # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1) embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred') embedded_predq = embed_pred(embedded_q) # (?, dim) # For every rule, for every predicate, embed the predicate embedded_ctx_preds = L.TimeDistributed(L.TimeDistributed(embed_pred, name='nest1'), name='nest2')(embedded_ctx) # (?, rules, preds, dim) # embed_rule = ZeroGRU(dim, go_backwards=True, name='embed_rule') # embedded_rules = NestedTimeDist(embed_rule, name='d_embed_rule')(embedded_ctx_preds) get_heads = L.Lambda(lambda x: x[:, :, 0, :], name='rule_heads') embedded_rules = get_heads(embedded_ctx_preds) # (?, rules, dim) # Reused layers over iterations repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1], name='repeat_to_ctx') diff_sq = L.Lambda(lambda xy: K.square(xy[0] - xy[1]), output_shape=(None, dim), name='diff_sq') mult = L.Multiply() concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2), output_shape=(None, dim * 5), name='concat') att_densel = L.Dense(dim // 2, activation='tanh', name='att_densel') att_dense = L.Dense(1, activation='sigmoid', name='att_dense') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') rule_mask = L.Lambda(lambda x: K.cast( K.any(K.not_equal(x, 0), axis=-1, keepdims=True), 'float32'), name='rule_mask')(embedded_rules) unifier = NestedTimeDist(ZeroGRU(dim, name='unifier'), name='dist_unifier') dot11 = L.Dot((1, 1)) # gating = L.Dense(1, activation='sigmoid', name='gating') # gate2 = L.Lambda(lambda xyg: xyg[2]*xyg[0] + (1-xyg[2])*xyg[1], name='gate') # Reasoning iterations state = embedded_predq repeated_q = repeat_toctx(embedded_predq) outs = list() for _ in range(iterations): # Compute attention between rule and query state ctx_state = repeat_toctx(state) # (?, rules, dim) s_s_c = diff_sq([ctx_state, embedded_rules]) s_m_c = mult([embedded_rules, state]) # (?, rules, dim) sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q]) sim_vec = att_densel(sim_vec) # (?, rules, dim//2) sim_vec = att_dense(sim_vec) # (?, rules, 1) sim_vec = mult([sim_vec, rule_mask]) sim_vec = squeeze2(sim_vec) # (?, rules) # sim_vec = L.Softmax(axis=1)(sim_vec) outs.append(sim_vec) # Unify every rule and weighted sum based on attention new_states = unifier(embedded_ctx_preds, initial_state=[state]) # (?, rules, dim) state = dot11([sim_vec, new_states]) # Apply gating # gate = gating(state) # outs.append(gate) # state = gate2([state, new_state, gate]) # Predication out = L.Dense(1, activation='sigmoid', name='out')(state) if ilp: return outs, out elif pca: model = Model([context, query], [embedded_rules]) elif training: model = Model([context, query], [out]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
def build_birnn_attention_model(voca_dim, time_steps, output_dim, rnn_dim, mlp_dim, item_embedding=None, rnn_depth=1, mlp_depth=1, num_att_channel=1, drop_out=0.5, rnn_drop_out=0., rnn_state_drop_out=0., trainable_embedding=False, gpu=False, return_customized_layers=False): """ Create A Bidirectional Attention Model. :param voca_dim: vocabulary dimension size. :param time_steps: the length of input :param output_dim: the output dimension size :param rnn_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param item_embedding: integer, numpy 2D array, or None (default=None) If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor. If item_embedding is a matrix, this matrix will be used as the embedding matrix. If item_embedding is None, then connect input tensor to RNN layer directly. :param rnn_depth: rnn depth :param mlp_depth: the depth of fully connected layers :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism :param drop_out: dropout rate of fully connected layers :param rnn_drop_out: dropout rate of rnn layers :param rnn_state_drop_out: dropout rate of rnn state tensor :param trainable_embedding: boolean :param gpu: boolean, default=False If True, CuDNNLSTM is used instead of LSTM for RNN layer. :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ if item_embedding is not None: inputs = models.Input(shape=(time_steps, ), dtype='int32', name='input0') x = inputs # item embedding if isinstance(item_embedding, np.ndarray): assert voca_dim == item_embedding.shape[0] x = layers.Embedding(voca_dim, item_embedding.shape[1], input_length=time_steps, weights=[ item_embedding, ], trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x) elif utils.is_integer(item_embedding): x = layers.Embedding(voca_dim, item_embedding, input_length=time_steps, trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x) else: raise ValueError( "item_embedding must be either integer or numpy matrix") else: inputs = models.Input(shape=(time_steps, voca_dim), dtype='float32', name='input0') x = inputs if gpu: # rnn encoding for i in range(rnn_depth): x = layers.Bidirectional(layers.CuDNNLSTM(rnn_dim, return_sequences=True), name='bi_lstm_layer' + str(i))(x) x = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x) x = layers.Dropout(rnn_drop_out, name="rnn_dropout_layer" + str(i))(x) else: # rnn encoding for i in range(rnn_depth): x = layers.Bidirectional(layers.LSTM( rnn_dim, return_sequences=True, dropout=rnn_drop_out, recurrent_dropout=rnn_state_drop_out), name='bi_lstm_layer' + str(i))(x) x = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x) # attention attention_heads = [] x_per = layers.Permute((2, 1), name='permuted_attention_x')(x) for h in range(max(1, num_att_channel)): attention = clayers.AttentionWeight(name="attention_weights_layer" + str(h))(x) xx = layers.Dot([2, 1], name='focus_head' + str(h) + '_layer0')([x_per, attention]) attention_heads.append(xx) if num_att_channel > 1: x = layers.Concatenate(name='focus_layer0')(attention_heads) else: x = attention_heads[0] x = layers.BatchNormalization(name='focused_batch_norm_layer')(x) # MLP Layers for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs, outputs) if return_customized_layers: return model, {'AttentionWeight': clayers.AttentionWeight} return model