def test_merge_dot(): i1 = layers.Input(shape=(4,)) i2 = layers.Input(shape=(4,)) o = layers.dot([i1, i2], axes=1) assert o._keras_shape == (None, 1) model = models.Model([i1, i2], o) dot_layer = layers.Dot(axes=1) o2 = dot_layer([i1, i2]) assert dot_layer.output_shape == (None, 1) x1 = np.random.random((2, 4)) x2 = np.random.random((2, 4)) out = model.predict([x1, x2]) assert out.shape == (2, 1) expected = np.zeros((2, 1)) expected[0, 0] = np.dot(x1[0], x2[0]) expected[1, 0] = np.dot(x1[1], x2[1]) assert_allclose(out, expected, atol=1e-4) # Test with negative tuple of axes. o = layers.dot([i1, i2], axes=(-1, -1)) assert o._keras_shape == (None, 1) model = models.Model([i1, i2], o) out = model.predict([x1, x2]) assert out.shape == (2, 1) assert_allclose(out, expected, atol=1e-4)
def hop(query, story): # query.shape = (embedding_dim,) # story.shape = (num sentences, embedding_dim) x = Reshape((1, embedding_dim))(query) # make it (1, embedding_dim) x = dot([story, x], 2) x = Reshape((story_maxsents,))(x) # flatten it for softmax x = Activation('softmax')(x) story_weights = Reshape((story_maxsents, 1))(x) # unflatten for dotting # makes a new embedding story_embedding2 = embed_and_sum(input_story_) x = dot([story_weights, story_embedding2], 1) x = Reshape((embedding_dim,))(x) x = dense_layer(x) return x, story_embedding2, story_weights
def build(self): input_encoded_m = self.encoders_m(self.input_sequence) input_encoded_c = self.encoders_c(self.input_sequence) question_encoded = self.encoders_question(self.question) match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen) response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. answer = LSTM(32)(answer) # (samples, 32) # one regularization layer -- more would probably be needed. answer = Dropout(0.3)(answer) answer = Dense(self.vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model model = Model([self.input_sequence, self.question], answer) self.model = model return model
def get_model(self): item_input = Input(shape=[1], name='Item') item_embedding = Embedding(self.num_tweets, self.n_latent_factors_item, name='Item-Embedding', embeddings_constraint=non_neg())(item_input) item_vec = Flatten(name='FlattenItem')(item_embedding) user_input = Input(shape=[1], name='User') user_vec = Flatten(name='FlattenUsers')( Embedding(self.num_users, self.n_latent_factors_user, name='User-Embedding', embeddings_constraint=non_neg())(user_input)) prod = dot([item_vec, user_vec], axes=1, name='DotProduct') return Model(input=[user_input, item_input], output=prod)
def eltwise(layer, layer_in, layerId): out = {} if (layer['params']['layer_type'] == 'Multiply'): # This input reverse is to handle visualization out[layerId] = multiply(layer_in[::-1]) elif (layer['params']['layer_type'] == 'Sum'): out[layerId] = add(layer_in[::-1]) elif (layer['params']['layer_type'] == 'Average'): out[layerId] = average(layer_in[::-1]) elif (layer['params']['layer_type'] == 'Dot'): out[layerId] = dot(layer_in[::-1], -1) else: out[layerId] = maximum(layer_in[::-1]) return out
question_encoder.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(0.3)) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen) response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. answer = LSTM(32)(answer) # (samples, 32) # one regularization layer -- more would probably be needed. answer = Dropout(0.3)(answer)
# turn the question into an embedding # also a bag of words input_question_ = Input((query_maxlen,)) embedded_question = Embedding(vocab_size, embedding_dim)(input_question_) embedded_question = Lambda(lambda x: K.sum(x, axis=1))(embedded_question) # add a "sequence length" of 1 so that it can # be dotted with the story later embedded_question = Reshape((1, embedding_dim))(embedded_question) print("inp_q.shape, emb_q.shape:", input_question_.shape, embedded_question.shape) # calculate the weights for each story line # embedded_story.shape = (N, num sentences, embedding_dim) # embedded_question.shape = (N, 1, embedding_dim) x = dot([embedded_story, embedded_question], 2) x = Reshape((story_maxsents,))(x) # flatten the vector x = Activation('softmax')(x) story_weights = Reshape((story_maxsents, 1))(x) # unflatten it again to be dotted later print("story_weights.shape:", story_weights.shape) x = dot([story_weights, embedded_story], 1) x = Reshape((embedding_dim,))(x) # flatten it again ans = Dense(vocab_size, activation='softmax')(x) # make the model model = Model([input_story_, input_question_], ans) # compile the model
# turn the question into an embedding # also a bag of words input_question_ = Input((query_maxlen, )) embedded_question = Embedding(vocab_size, embedding_dim)(input_question_) embedded_question = Lambda(lambda x: K.sum(x, axis=1))(embedded_question) # add a "sequence length" of 1 so that it can # be dotted with the story later embedded_question = Reshape((1, embedding_dim))(embedded_question) print("inp_q.shape, emb_q.shape:", input_question_.shape, embedded_question.shape) # calculate the weights for each story line # embedded_story.shape = (N, num sentences, embedding_dim) # embedded_question.shape = (N, 1, embedding_dim) x = dot([embedded_story, embedded_question], 2) x = Reshape((story_maxsents, ))(x) # flatten the vector x = Activation('softmax')(x) story_weights = Reshape( (story_maxsents, 1))(x) # unflatten it again to be dotted later print("story_weights.shape:", story_weights.shape) x = dot([story_weights, embedded_story], 1) x = Reshape((embedding_dim, ))(x) # flatten it again ans = Dense(vocab_size, activation='softmax')(x) # make the model model = Model([input_story_, input_question_], ans) # compile the model model.compile(optimizer=RMSprop(lr=1e-2),
def _setup(self, config): # Assign number of threads by looking at resources print("Got {} atoms.".format(self.atoms)) self.num_threads = self.atoms set_keras_threads(self.num_threads) all_data = get_and_parse_babi_data(config) self.batch_size = self.config["batch_size"] self.num_batches_per_step = config.get("num_batches_per_step", 1) self.val_batches_per_step = max(int(self.num_batches_per_step / 4), 1) self.training_dataset = all_data[:3] self.testing_dataset = all_data[3:6] vocab_size, story_maxlen, query_maxlen = all_data[6:] input_sequence = Input((story_maxlen, )) question = Input((query_maxlen, )) # encoders # embed the input sequence into a sequence of vectors input_encoder_m = Sequential() input_encoder_m.add( Embedding( input_dim=vocab_size, output_dim=int(config["embedding"]), )) input_encoder_m.add(Dropout(config["dropout"])) # output: (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen input_encoder_c = Sequential() input_encoder_c.add( Embedding(input_dim=vocab_size, output_dim=query_maxlen)) input_encoder_c.add(Dropout(config["dropout"])) # output: (samples, story_maxlen, query_maxlen) # embed the question into a sequence of vectors question_encoder = Sequential() question_encoder.add( Embedding( input_dim=vocab_size, output_dim=int(config["embedding"]), input_length=query_maxlen, )) question_encoder.add(Dropout(config["dropout"])) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation("softmax")(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c ]) # (samples, story_maxlen, query_maxlen) response = Permute( (2, 1))(response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. answer = LSTM(int(config["lstm_size"]))(answer) # (samples, 32) # one regularization layer -- more would probably be needed. answer = Dropout(config["dropout"])(answer) answer = Dense(vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation("softmax")(answer) # build the final model model = Model([input_sequence, question], answer) model.compile( optimizer=config["opt"], loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) self.model = model
def pointnet_cls(number_of_points, number_of_classes): """ Classification PointNet. Returns a Keras multi-class (#number_of_classes classes) model. # Parameters: - number_of_points: integer Number of points in pointcloud uniformly sampled from each object. - number_of_classes: integer Number of object classes. Model inputs: pointcloud, shape (B, number_of_points, 3, 1). Model outputs: class predictions, shape (B, number_of_classes). """ inputs = Input((number_of_points, 3, 1,), name = "point_cloud_input") transform_matrix_inp = t_net(inputs, part = "input") point_cloud_transformed = dot([Reshape((number_of_points, 3))(inputs), transform_matrix_inp], axes = [2,1]) input_image = Reshape((number_of_points, 3, 1), name = "inp_reshape")(point_cloud_transformed) net = Conv2D(64, kernel_size = (1,3), strides=(1, 1), padding='valid', activation = None, name = "conv1")(input_image) net = BatchNormalization(name = "conv1_bn")(net) net = Activation("relu", name = "conv1_relu")(net) net = Conv2D(64, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv2")(net) net = BatchNormalization(name = "conv2_bn")(net) net = Activation("relu", name = "conv2_relu")(net) transform_matrix_feat = t_net(net, part = "feature") net_transformed = dot([Reshape((number_of_points, 64))(net), transform_matrix_feat], axes = [2,1]) net_transformed = Reshape((number_of_points, 1, 64), name = "feat_reshape")(net_transformed) net = Conv2D(64, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv3")(net_transformed) net = BatchNormalization(name = "conv3_bn")(net) net = Activation("relu", name = "conv3_relu")(net) net = Conv2D(128, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv4")(net) net = BatchNormalization(name = "conv4_bn")(net) net = Activation("relu", name = "conv4_relu")(net) net = Conv2D(1024, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv5")(net) net = BatchNormalization(name = "conv5_bn")(net) net = Activation("relu", name = "conv5_relu")(net) net = MaxPooling2D(pool_size=(number_of_points, 1), strides=(2,2), padding='valid', name = "maxpool")(net) net = Reshape((1024,), name = "maxpool_reshape")(net) net = Dense(512, activation = None, name = "fc1")(net) net = BatchNormalization(name = "fc1_bn")(net) net = Activation("relu", name = "fc1_relu")(net) net = Dropout(0.3, name = "dp1")(net) net = Dense(256, activation = None, name = "fc2")(net) net = BatchNormalization(name = "fc2_bn")(net) net = Activation("relu", name = "fc2_relu")(net) net = Dropout(0.3, name = "dp2")(net) outputs = Dense(number_of_classes, activation="softmax", name = "fc3")(net) return Model(inputs = inputs, outputs = outputs)
def __init__(self, vocab_size=22, story_maxlen=68, query_maxlen=4, n_lstm=32, bidirect=True, tdd=True, batch_size=None, matchconv=False, permute=False): """ DeepMemNet Param note - changing parameters will require new model file (duh) - this isn't automatic yet :param vocab_size: :param story_maxlen: :param query_maxlen: :param n_lstm: :param bidirect: """ # todo: config file for model hyperparams with logging link # self.vocab_size = vocab_size # self.story_maxlen = story_maxlen # self.query_maxlen = query_maxlen super().__init__(vocab_size=vocab_size, story_maxlen=story_maxlen, query_maxlen=query_maxlen) # placeholders input_sequence = Input((story_maxlen, ), name='InputSeq') question = Input((query_maxlen, ), name='Question') # Encoders - initial encoders are pretty much just projecting the input into a useful space # not much need to optimize here really input_encoder_m = Sequential(name='InputEncoderM') input_encoder_m.add( Embedding(input_dim=vocab_size, output_dim=64, name='InEncM_Embed')) input_encoder_m.add(Dropout(0.3)) # output: (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen input_encoder_c = Sequential(name='InputEncoderC') input_encoder_c.add( Embedding(input_dim=vocab_size, output_dim=query_maxlen, name='InEncC_Embed')) input_encoder_c.add(Dropout(0.3)) # output: (samples, story_maxlen, query_maxlen) # embed the question into a sequence of vectors question_encoder = Sequential(name='QuestionEncoder') question_encoder.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen, name='QuesEnc_Embed')) question_encoder.add(Dropout(0.3)) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2), name='Match') match = Activation('softmax')(match) if matchconv: match = Conv1D(query_maxlen, 4, padding='same')(match) # add the match matrix with the second input vector sequence response = add( [match, input_encoded_c], name='ResponseAdd') # (samples, story_maxlen, query_maxlen) response = Permute((2, 1), name='ResponsePermute')( response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded], name='AnswerConcat') # Trying to feed in the long axis as the timestep causes the GPU to get very angry. # It would appear it causes it to start thrashing memory if permute: answer = Permute((2, 1), name='AnswerPermute')( answer) # (samples, story_maxlen, query_maxlen) # Let's try with a time distributed dense before the RNN if tdd: answer = TimeDistributed(Dense(n_lstm, name='Answer_TDD'))(answer) # Bidirectional LSTM for better context recognition, plus an additional one for flavor lstm_rev = Bidirectional( LSTM(n_lstm, return_sequences=True, name='Ans_LSTM_reverse')) lstm_for = Bidirectional( LSTM(n_lstm, return_sequences=False, name='Ans_LSTM_forward')) if bidirect: answer = lstm_rev(answer) # "reverse" pass goes first answer = lstm_for(answer) # answer = LSTM(n_lstm, name='Ans_LSTM_3)(answer) # Extra LSTM completely runs out of steam at 55% acc! Bidirectional seems to help # one regularization layer -- more would probably be needed. answer = Dropout(0.3, name='Answer_Drop')(answer) answer = Dense(vocab_size, name='Answer_Dense')(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model model = Model([input_sequence, question], answer) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) self.model = model
#build model codes_input_matrix = layers.Input(shape=(Y2_train.shape[1], 3), dtype='float32', name="codes_matrix") inp = layers.Input(shape=(image_size, image_size, 3)) #x = denseNet(inp) x = efficientNet(inp) auxiliary_output = layers.Dense(Y2_train.shape[1], activation='softmax', name="auxiliary_output")(x) cube_prob = layers.Lambda(cube)(auxiliary_output) main_output = layers.dot([cube_prob, codes_input_matrix], axes=1, name='main_output') model = models.Model(inputs=[inp, codes_input_matrix], outputs=[main_output, auxiliary_output]) ################################ #model = models.load_model(_MODEL_FINAL_NAME) #model.load_weights(_MODEL_WEIGHTS_FINAL_NAME) checkpoint = ModelCheckpoint(_MODEL_WEIGHTS_FINAL_NAME, monitor='loss', verbose=1, save_best_only=True, save_weights_only=True)
def pointnet_seg(number_of_points, number_of_parts): """ Modified Part Segmentation PointNet. Returns a Keras model. # Parameters: - number_of_points: integer Number of points in pointcloud uniformly sampled from each object. - number_of_parts: integer Number of segmented object parts. Model inputs: pointcloud, shape (B, number_of_points, 3, 1), Model outputs: segmentation predictions, shape (B, number_of_points, number_of_parts) """ inputs = Input((number_of_points, 3, 1,), name = "point_cloud_input") transform_matrix_inp = t_net(inputs, part = "input") point_cloud_transformed = dot([Reshape((number_of_points, 3), name = "point_cloud_dropdim")(inputs), transform_matrix_inp], axes = [2,1]) input_image = Reshape((number_of_points, 3, 1), name = "point_cloud_transf_adddim")(point_cloud_transformed) net = Conv2D(64, kernel_size = (1,3), strides=(1, 1), padding='valid', activation = None, name = "conv1")(input_image) net = BatchNormalization(name = "conv1_bn")(net) out1 = Activation("relu", name = "conv1_relu")(net) net = Conv2D(128, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv2")(out1) net = BatchNormalization(name = "conv2_bn")(net) out2 = Activation("relu", name = "conv2_relu")(net) net = Conv2D(128, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv3")(out2) net = BatchNormalization(name = "conv3_bn")(net) out3 = Activation("relu", name = "conv3_relu")(net) transform_matrix_feat = t_net(out3, part = "feature") net_transformed = dot([Reshape((number_of_points, 128), name = "feat_dropdim")(out3), transform_matrix_feat], axes = [2,1]) out_feat = net_transformed = Reshape((number_of_points, 1, 128), name = "feat_transf_adddim")(net_transformed) net = Conv2D(512, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv4")(net_transformed) net = BatchNormalization(name = "conv4_bn")(net) out4 = Activation("relu", name = "conv4_relu")(net) net = Conv2D(2048, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv5")(out4) net = BatchNormalization(name = "conv5_bn")(net) net = Activation("relu", name = "conv5_relu")(net) out_max = MaxPooling2D(pool_size=(number_of_points, 1), strides=(2,2), padding='valid', name = "maxpool")(net) # SEGMENTATION PART expand = Lambda(lambda x: K.tile(x, [1, number_of_points, 1, 1]))(out_max) # concatenation without onehot encoded labels concat = concatenate([out1, out2, out3, out_feat, out4, expand], axis = -1) net2 = Conv2D(256, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "seg_conv1")(concat) net2 = BatchNormalization(name = "seg_conv1_bn")(net2) net2 = Activation("relu", name = "seg_conv1_relu")(net2) net2 = Dropout(0.2, name = "seg_dp1")(net2) net2 = Conv2D(256, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "seg_conv2")(net2) net2 = BatchNormalization(name = "seg_conv2_bn")(net2) net2 = Activation("relu", name = "seg_conv2_relu")(net2) net2 = Dropout(0.2, name = "seg_dp2")(net2) net2 = Conv2D(128, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "seg_conv3")(net2) net2 = BatchNormalization(name = "seg_conv3_bn")(net2) net2 = Activation("relu", name = "seg_conv3_relu")(net2) net2 = Conv2D(number_of_parts, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = "softmax", name = "seg_conv4")(net2) seg_output = Reshape((number_of_points, number_of_parts), name = "seg_output")(net2) return Model(inputs = [inputs], outputs = [seg_output])
def non_local_block(ip, computation_compression=2, mode='embedded'): channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 ip_shape = K.int_shape(ip) if mode not in ['gaussian', 'embedded', 'dot', 'concatenate']: raise ValueError('`mode` must be one of `gaussian`, `embedded`, `dot` or `concatenate`') dim1, dim2, dim3 = None, None, None if len(ip_shape) == 3: # time series data rank = 3 batchsize, dim1, channels = ip_shape elif len(ip_shape) == 4: # image data rank = 4 if channel_dim == 1: batchsize, channels, dim1, dim2 = ip_shape else: batchsize, dim1, dim2, channels = ip_shape elif len(ip_shape) == 5: # Video / Voxel data rank = 5 if channel_dim == 1: batchsize, channels, dim1, dim2, dim3 = ip_shape else: batchsize, dim1, dim2, dim3, channels = ip_shape else: raise ValueError('Input dimension has to be either 3 (temporal), 4 (spatial) or 5 (spatio-temporal)') if mode == 'gaussian': # Gaussian instantiation x1 = Reshape((-1, channels))(ip) # xi x2 = Reshape((-1, channels))(ip) # xj f = dot([x1, x2], axes=2) f = Activation('softmax')(f) elif mode == 'dot': # Dot instantiation # theta path theta = _convND(ip, rank, channels // 2) theta = Reshape((-1, channels // 2))(theta) # phi path phi = _convND(ip, rank, channels // 2) phi = Reshape((-1, channels // 2))(phi) f = dot([theta, phi], axes=2) # scale the values to make it size invariant if batchsize is not None: f = Lambda(lambda z: 1./ batchsize * z)(f) else: f = Lambda(lambda z: 1. / 128 * z)(f) elif mode == 'concatenate': # Concatenation instantiation raise NotImplemented('Concatenation mode has not been implemented yet') else: # Embedded Gaussian instantiation # theta path theta = _convND(ip, rank, channels // 2) theta = Reshape((-1, channels // 2))(theta) # phi path phi = _convND(ip, rank, channels // 2) phi = Reshape((-1, channels // 2))(phi) if computation_compression > 1: # shielded computation phi = MaxPool1D(computation_compression)(phi) f = dot([theta, phi], axes=2) f = Activation('softmax')(f) # g path g = _convND(ip, rank, channels // 2) g = Reshape((-1, channels // 2))(g) if computation_compression > 1 and mode == 'embedded': # shielded computation g = MaxPool1D(computation_compression)(g) # compute output path y = dot([f, g], axes=[2, 1]) # reshape to input tensor format if rank == 3: y = Reshape((dim1, channels // 2))(y) elif rank == 4: if channel_dim == -1: y = Reshape((dim1, dim2, channels // 2))(y) else: y = Reshape((channels // 2, dim1, dim2))(y) else: if channel_dim == -1: y = Reshape((dim1, dim2, dim3, channels // 2))(y) else: y = Reshape((channels // 2, dim1, dim2, dim3))(y) # project filters y = _convND(y, rank, channels) # residual connection residual = add([ip, y]) return residual
encoder = LSTM(512, return_sequences=True, unroll=True)(encoder) encoder_last = encoder[:,-1,:] print('encoder', encoder) print('encoder_last', encoder_last) decoder = Embedding(dict_size, 128, input_length=OUTPUT_LENGTH, mask_zero=True)(decoder_input) decoder = LSTM(512, return_sequences=True, unroll=True)(decoder, initial_state=[encoder_last, encoder_last]) print('decoder', decoder) from keras.layers import Activation, dot, concatenate # Equation (7) with 'dot' score from Section 3.1 in the paper. # Note that we reuse Softmax-activation layer instead of writing tensor calculation attention = dot([decoder, encoder], axes=[2, 2]) attention = Activation('softmax', name='attention')(attention) print('attention', attention) context = dot([attention, encoder], axes=[2,1]) print('context', context) decoder_combined_context = concatenate([context, decoder]) print('decoder_combined_context', decoder_combined_context) # Has another weight + tanh layer as described in equation (5) of the paper output = TimeDistributed(Dense(512, activation="tanh"))(decoder_combined_context) output = TimeDistributed(Dense(dict_size, activation="softmax"))(output) print('output', output) model = Model(inputs=[encoder_input, dec
def create_models(self): anchor_input_image = Input(shape=(2048, )) anchor_input_text = Input(shape=(self.bow.nb_most_frequent, )) negative_input_image = Input(shape=(2048, )) negative_input_text = Input(shape=(self.bow.nb_most_frequent, )) # Image layers #image2 = Dense(300, activation="relu") image3 = Dense( 256, activation="relu", #kernel_regularizer=regularizers.l2(0.05) ) # Text layers text2 = Dense( 256, activation="relu", #kernel_regularizer=regularizers.l2(0.05) ) # Shared Latent Space shared_latent_space = Dense(128, activation="relu") # Branches # Anchor image branch anchor_image_branch = shared_latent_space(image3(anchor_input_image)) # Anchor text branch anchor_text_branch = shared_latent_space(text2(anchor_input_text)) # Negative image branch negative_image_branch = shared_latent_space( image3(negative_input_image)) # Negative text branch negative_text_branch = shared_latent_space(text2(negative_input_text)) # Dot product as distance metric (normalize = True gives cosine distance, normalize = False gives dot product) anchor_distance = dot([anchor_image_branch, anchor_text_branch], axes=1, normalize=True) negative_image_distance = dot( [anchor_image_branch, negative_text_branch], axes=1, normalize=True) negative_text_distance = dot( [anchor_text_branch, negative_image_branch], axes=1, normalize=True) # Concatenate dot products output = concatenate( [anchor_distance, negative_image_distance, negative_text_distance]) training_model = Model(inputs=[ anchor_input_image, anchor_input_text, negative_input_image, negative_input_text ], outputs=output) image_model = Model(inputs=anchor_input_image, outputs=anchor_image_branch) text_model = Model(inputs=anchor_input_text, outputs=anchor_text_branch) return training_model, image_model, text_model
y_emb = embedding(y_in) y_emb = Dropout(0.5)(y_emb) h = lstm(y_emb) y_emb = Dropout(0.5)(h) h, _, _ = lstm2(h, initial_state=[h_0, cell_0]) # h, _, _ = CuDNNLSTM(hid_dim, return_sequences=True, return_state=True)(y_emb, initial_state=[h_0, cell_0]) # x = Bidirectional(LSTM(100, return_sequences=True, dropout=0.25, recurrent_dropout=0.1))(x) h = Activation('tanh')(h) ### Attention ### dense_att = Dense(hid_dim) _u_map = dense_att(u_map) score = dot([_u_map, h], axes=-1) permute_att1 = Permute((2, 1)) activation_att = Activation('softmax') score = permute_att1(score) a = activation_att(score) permute_att2 = Permute((2, 1)) context = dot([u_map, a], axes=(1, 2)) context = permute_att2(context) dense_output1 = Dense(hid_dim) dense_output2 = Dense(vocab_size) softmax = Activation('softmax') h_tilde = Lambda(lambda x: K.concatenate([x[0], x[1]], axis=2))([h, context]) h_tilde = dense_output1(h_tilde)
activation='relu') conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], TOPIC_EMB_DIM), padding="valid", kernel_initializer='normal', activation='relu') s1 = Bidirectional(LSTM(80)) s2 = Dense(CATEGORY, activation='softmax') cls_vars = [c1, t1, f1, o1, s1, s2] x = seq_emb(seq_input) x = c1(x) # reducing dim x = Dropout(0.05)(x) wt_emb = topic_emb(psudo_input) wt_emb = t1(wt_emb) # reducing dim # first match layer match = dot([x, wt_emb], axes=(2, 2)) joint_match = add([represent_mu, match]) joint_match = f1(joint_match) topic_sum = add([joint_match, x]) topic_sum = o1(topic_sum) # # second match layer # match = dot([topic_sum, wt_emb], axes=(2, 2)) # joint_match = add([represent_mu, match]) # joint_match = f2(joint_match) # topic_sum = add([joint_match, x]) # topic_sum = o2(topic_sum) # # third match layer # match = dot([topic_sum, wt_emb], axes=(2, 2)) # joint_match = add([represent_mu, match]) # joint_match = f3(joint_match) # topic_sum = add([joint_match, x])
state_c2 = concatenate([fwd_c2, bck_c2]) # decoder decoder_input = Input(shape=(MAX_OUT_LEN,), name='decoder_input') # english embedding layer and dropout decoder_embed = Embedding(STR_VOCAB, STR_EMBED, mask_zero=True, name='decoder_embed')(decoder_input) decoder_embed = Dropout(DROP_RATE)(decoder_embed) # two-layer LSTM initialized with encoder states decoder_hout1 = LSTM(HIDDEN_SIZE, return_sequences=True, name='decoder_lstm1')(decoder_embed, initial_state=[state_h1, state_c1]) decoder_hout2 = LSTM(HIDDEN_SIZE, return_sequences=True, name='decoder_lstm2')(decoder_hout1, initial_state=[state_h2, state_c2]) # Luong global dot attention # score function from the Luong apper = dot score = dot([decoder_hout2, encoder_hout2], axes=[2, 2], name='attn_dotprod') # turn score to "attention dist." for weighted sum attention = Activation('softmax', name='attn_softmax')(score) # do the attention-weighted sum using dot product context = dot([attention, encoder_hout2], axes=[2, 1], name='cont_dotprod') # 'stacked' the context vector with the decoder guess == 'attention vector' context = concatenate([context, decoder_hout2], name='cont_concats') # activation context = TimeDistributed(Dense(HIDDEN_SIZE*2, activation='tanh'), name='cont_dnstanh')(context) # guess which english letter output = TimeDistributed(Dense(STR_VOCAB, activation='softmax'))(context)
def build_ame_model(input_dim, output_dim, make_expert_fn=None, l2_weight=0.0, num_softmaxes=1, num_units=36, granger_loss_weight=0.03, is_regression=True, fast_compile=True, is_image=True, downsample_factor=4, learning_rate=0.0001, dropout=0.0, attention_dropout=0.2, **kwargs): if make_expert_fn is None: make_expert_fn = ModelBuilder.build_mlp_expert output_activation, output_tf_activation = ModelBuilder.get_output_activation(is_regression, output_dim) input_layer, input_num_dimensions = Input(shape=input_dim), int(np.prod(input_dim)) last_layer = input_layer if is_image: last_num_units = num_units for _ in range(downsample_factor // 2): # Apply downsampling convolutions for image data to reduce the number of total experts. # This reduces the compilation and training time at the cost of resolution # in the attention map. last_layer = Conv2D(last_num_units, kernel_size=2, strides=2, activation="elu", kernel_regularizer=L1L2(l2=l2_weight))(last_layer) if dropout != 0.0: last_layer = Dropout(dropout)(last_layer) last_num_units *= 2 num_units = K.int_shape(last_layer)[-1] num_pixel_experts = np.prod(K.int_shape(last_layer)[1:3]) last_layer = Reshape((num_pixel_experts, num_units))(last_layer) topmost_hidden_states = Lambda(lambda x: tf.unstack(x, axis=1))(last_layer) else: topmost_hidden_states = None if fast_compile: outputs, topmost_hidden_states, extra_trainable_weights1 = \ ModelBuilder._get_expert_outputs_optimized(input_num_dimensions, last_layer, num_units, output_dim, output_tf_activation, topmost_hidden_states=topmost_hidden_states) all_but_one_auxiliary_outputs, extra_trainable_weights2 = \ ModelBuilder._get_expert_auxiliary_predictions_optimized(output_dim, output_tf_activation, topmost_hidden_states) else: outputs, topmost_hidden_states, extra_trainable_weights1 = \ ModelBuilder._get_expert_outputs_unoptimized(input_num_dimensions, last_layer, make_expert_fn, output_dim, output_activation, l2_weight, topmost_hidden_states=topmost_hidden_states, **kwargs) all_but_one_auxiliary_outputs, extra_trainable_weights2 = \ ModelBuilder._get_expert_auxiliary_predictions_unoptimized(output_dim, output_activation, topmost_hidden_states) extra_trainable_weights = extra_trainable_weights1 + extra_trainable_weights2 all_auxiliary_outputs = concatenate(topmost_hidden_states) all_auxiliary_outputs_layer = Dense(output_dim, activation=output_activation, name="all_auxiliary") # Extra trainable weights must be added to a trainable layer. # See https://stackoverflow.com/questions/46544329/keras-add-external-trainable-variable-to-graph all_auxiliary_outputs_layer.trainable_weights.extend(extra_trainable_weights) all_auxiliary_outputs = all_auxiliary_outputs_layer(all_auxiliary_outputs) combined_hidden_state = concatenate(topmost_hidden_states + outputs) attention_weights = MixtureSoftAttention(num_softmaxes=num_softmaxes, num_independent_attention_mechanisms=len(outputs), attention_dropout=attention_dropout, name="mixture_attention_1", u_regularizer=L1L2(l2=l2_weight), w_regularizer=L1L2(l2=l2_weight), activation="tanh", normalised=True)(combined_hidden_state) if is_regression: concatenated_residuals = concatenate(outputs, axis=-1) concatenated_residuals = Reshape((len(outputs),))(concatenated_residuals) attention_weights = Reshape((len(outputs),), name="soft_attention_1")(attention_weights) output = dot([attention_weights, concatenated_residuals], axes=-1, name="combined") else: concatenated_residuals = Lambda(lambda x: K.stack(x, axis=-2))(outputs) output = Lambda(ModelBuilder._attention_dot, name="combined")([attention_weights, concatenated_residuals]) granger_output = Lambda(lambda x: x, name="granger")(output) repeat_output = Lambda(lambda x: x, name="repeat")(all_auxiliary_outputs) if is_regression: main_loss = "mse" auxiliary_loss = absolute_error_loss metrics = {} else: main_loss = "binary_crossentropy" if output_dim == 1 else "categorical_crossentropy" auxiliary_loss = categorical_loss metrics = {"combined": "accuracy"} granger_loss = partial(granger_causal_loss, attention_weights=attention_weights, auxiliary_outputs=all_auxiliary_outputs, all_but_one_auxiliary_outputs=all_but_one_auxiliary_outputs, loss_function=auxiliary_loss) granger_loss.__name__ = "granger_causal_loss" # We optimise compilation speed by using one shared loss function for all auxiliary outputs. repeat_loss = partial(repeat_output_loss, outputs=outputs + all_but_one_auxiliary_outputs + [all_auxiliary_outputs], main_loss=main_loss) repeat_loss.__name__ = "repeat_loss" extra_losses = [granger_loss, repeat_loss] outputs = [output, granger_output, repeat_output] auxiliary_loss_weight = 1.0 loss_weights = [(1 - granger_loss_weight), granger_loss_weight, auxiliary_loss_weight] model = Model(inputs=input_layer, outputs=outputs) return ModelBuilder.compile_model(model, learning_rate=learning_rate, main_loss=main_loss, extra_loss=extra_losses, loss_weights=loss_weights, metrics=metrics, # We found gradient clipping useful to combat exploding gradients # when using unbounded outputs, e.g. in the regression setting. gradient_clipping_threshold=100 if is_regression else 0)
def train_model(self): '''Trains model based on specifications INPUT: vectorized texts (encoder input, decoder input, decoder target) OUTPUT: trained model - saves best weights and final weights (to be loaded into inference model) ''' # Define encoder model input and LSTM layers and states encoder_inputs = Input(shape=(None, self.num_encoder_tokens), name='encoder_inputs') encoder = LSTM(self.latent_dim, return_sequences=True, return_state=True, name='encoder') encoder_outputs, state_h, state_c = encoder(encoder_inputs) encoder_states = [state_h, state_c] # Set up the decoder, using 'encoder_states' as initial state. # We set up our decoder to return full output sequences and to return internal states as well. decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_inputs') decoder = LSTM(self.latent_dim, return_sequences=True, return_state=True, name='decoder') decoder_outputs, _, _ = decoder(decoder_inputs, initial_state=encoder_states) # Attention attention = dot([decoder_outputs, encoder_outputs], axes=[2, 2], normalize=False) attention = Activation('softmax')(attention) context = dot([attention, encoder_outputs], axes=[2, 1]) decoder_combined_context = Concatenate(axis=-1)( [context, decoder_outputs]) output = TimeDistributed(Dense( 64, activation="tanh"))(decoder_combined_context) decoder_outputs = TimeDistributed( Dense(self.num_decoder_tokens, activation="softmax"))(output) decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_dense') decoder_outputs = decoder_dense(decoder_outputs) # Define the model that will turn `encoder_input_data` & `decoder_input_data` into `decoder_target_data` (Teacher training) self.model = Model([encoder_inputs, decoder_inputs], decoder_outputs) print(self.model.summary()) checkpoint = ModelCheckpoint(filepath=self.weights_file_path, save_best_only=True, save_weights_only=True, verbose=1) self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics) self.history = self.model.fit( [self.encoder_input_data, self.decoder_input_data], self.decoder_target_data, batch_size=self.batch_size, epochs=self.num_epochs, validation_split=0.1, callbacks=[checkpoint]) with open(f'../data/trainHistoryDict_{self.model_name}.pkl', 'wb') as file_pi: pickle.dump(self.history.history, file_pi) self.model.save_weights( f'../models/{self.model_name}_final_weights.h5')
dtype="float32") # 埋め込みレイヤーを作成する。各入力値共通で学習する。 shared_embedding_layer = Embedding(input_dim=(G.vocab_size + 3), output_dim=G.embedding_dimension, weights=[embedding]) word_embedding = shared_embedding_layer(word_index) context_embeddings = shared_embedding_layer(context) negative_words_embedding = shared_embedding_layer(negative_samples) # 周辺単語を平均化し、埋め込みレイヤーのベクトルを取得する。 cbow = Lambda(lambda x: K.mean(x, axis=1), output_shape=(G.embedding_dimension, ))(context_embeddings) # 周辺単語のベクトルと、中心単語・ネガティブサンプルの単語の内積を求める。 word_context_product = dot([word_embedding, cbow], axes=-1) negative_context_product = dot([negative_words_embedding, cbow], axes=-1) # 上記で算出した内積がモデルの出力となる。 model = Model(input=[word_index, context, negative_samples], output=[word_context_product, negative_context_product]) # モデルのオプティマイザとしてRMSprop、損失関数としてバイナリ交差エントロピーを使用する。 model.compile(optimizer='rmsprop', loss='binary_crossentropy') print(model.summary()) # 訓練を開始する。 print("===== Start training.") model.fit_generator(V_gen.data_generator(train_data, batch_size), epochs=epoch, shuffle=True)
def pointnet_joint(number_of_points, number_of_classes, number_of_parts): """ Joint Classification and Part Segmentation PointNet. Returns a Keras multi-input and multi-output model. # Parameters: - number_of_points: integer Number of points in pointcloud uniformly sampled from each object. - number_of_classes: integer Number of object classes. - number_of_parts: integer Number of segmented object parts. Model inputs: pointcloud, shape (B, number_of_points, 3, 1), labels, shape (?????). Model outputs: class predictions, shape (B, number_of_classes), segmentation predictions, shape (B, number_of_points, number_of_parts) """ inputs = Input((number_of_points, 3, 1,), name = "point_cloud_input") labels = Input((1,), name = "labels_input") labels_one_hot = Lambda(lambda x : K.one_hot(K.cast(x,'int32'), number_of_classes), name = "labels_to_onehot")(labels) transform_matrix_inp = t_net(inputs, part = "input") point_cloud_transformed = dot([Reshape((number_of_points, 3), name = "point_cloud_dropdim")(inputs), transform_matrix_inp], axes = [2,1]) input_image = Reshape((number_of_points, 3, 1), name = "point_cloud_transf_adddim")(point_cloud_transformed) net = Conv2D(64, kernel_size = (1,3), strides=(1, 1), padding='valid', activation = None, name = "conv1")(input_image) net = BatchNormalization(name = "conv1_bn")(net) out1 = Activation("relu", name = "conv1_relu")(net) net = Conv2D(128, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv2")(out1) net = BatchNormalization(name = "conv2_bn")(net) out2 = Activation("relu", name = "conv2_relu")(net) net = Conv2D(128, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv3")(out2) net = BatchNormalization(name = "conv3_bn")(net) out3 = Activation("relu", name = "conv3_relu")(net) transform_matrix_feat = t_net(out3, part = "feature") net_transformed = dot([Reshape((number_of_points, 128), name = "feat_dropdim")(out3), transform_matrix_feat], axes = [2,1]) out_feat = net_transformed = Reshape((number_of_points, 1, 128), name = "feat_transf_adddim")(net_transformed) net = Conv2D(512, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv4")(net_transformed) net = BatchNormalization(name = "conv4_bn")(net) out4 = Activation("relu", name = "conv4_relu")(net) net = Conv2D(2048, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "conv5")(out4) net = BatchNormalization(name = "conv5_bn")(net) net = Activation("relu", name = "conv5_relu")(net) out_max = MaxPooling2D(pool_size=(number_of_points, 1), strides=(2,2), padding='valid', name = "maxpool")(net) out_max_rshp = Reshape((2048,), name = "maxpool_dropdims")(out_max) # CLASSIFICATION PART net = Dense(256, activation = None, name = "cla_fc1")(out_max_rshp) net = BatchNormalization(name = "cla_fc1_bn")(net) net = Activation("relu", name = "cla_fc1_relu")(net) net = Dropout(0.3, name = "cla_dp1")(net) # removed in original part_seg net architecture net = Dense(256, activation = None, name = "cla_fc2")(net) net = BatchNormalization(name = "cla_fc2_bn")(net) net = Activation("relu", name = "cla_fc2_relu")(net) net = Dropout(0.3, name = "cla_dp2")(net) cla_output = Dense(number_of_classes, activation="softmax", name = "cla_output")(net) # SEGMENTATION PART labels_one_hot_rshp = Reshape((1, 1, number_of_classes), name = "labels_reshape")(labels_one_hot) out_max2 = concatenate([out_max ,labels_one_hot_rshp], axis = -1) expand = Lambda(lambda x: K.tile(x, [1, number_of_points, 1, 1]))(out_max2) concat = concatenate([out1, out2, out3, out_feat, out4, expand], axis = -1) net2 = Conv2D(256, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "seg_conv1")(concat) net2 = BatchNormalization(name = "seg_conv1_bn")(net2) net2 = Activation("relu", name = "seg_conv1_relu")(net2) net2 = Dropout(0.2, name = "seg_dp1")(net2) net2 = Conv2D(256, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "seg_conv2")(net2) net2 = BatchNormalization(name = "seg_conv2_bn")(net2) net2 = Activation("relu", name = "seg_conv2_relu")(net2) net2 = Dropout(0.2, name = "seg_dp2")(net2) net2 = Conv2D(128, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = None, name = "seg_conv3")(net2) net2 = BatchNormalization(name = "seg_conv3_bn")(net2) net2 = Activation("relu", name = "seg_conv3_relu")(net2) net2 = Conv2D(number_of_parts, kernel_size = (1,1), strides=(1, 1), padding='valid', activation = "softmax", name = "seg_conv4")(net2) seg_output = Reshape((number_of_points, number_of_parts), name = "seg_output")(net2) return Model(inputs = [inputs, labels], outputs = [cla_output, seg_output])
def buildModelArch(self, vocab_size, query_maxlen, input_sequence, question): # encoders # embed the input sequence into a sequence of vectors input_encoder_m = Sequential() input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) input_encoder_m.add(Dropout(0.3)) # output: (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen input_encoder_c = Sequential() input_encoder_c.add( Embedding(input_dim=vocab_size, output_dim=query_maxlen)) input_encoder_c.add(Dropout(0.3)) # output: (samples, story_maxlen, query_maxlen) # embed the question into a sequence of vectors question_encoder = Sequential() question_encoder.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(0.3)) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) print('Input encoded m', input_encoded_m) input_encoded_c = input_encoder_c(input_sequence) print('Input encoded c', input_encoded_c) question_encoded = question_encoder(question) print('Question encoded', question_encoded) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen) match = dot([input_encoded_m, question_encoded], axes=(2, 2)) print(match.shape) match = Activation('softmax')(match) print('Match shape', match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c ]) # (samples, story_maxlen, query_maxlen) response = Permute( (2, 1))(response) # (samples, query_maxlen, story_maxlen) print('Response shape', response) # concatenate the response vector with the question vector sequence answer = concatenate([response, question_encoded]) print('Answer shape', answer) # answer = LSTM(lstm_size, return_sequences=True)(answer) # Generate tensors of shape 32 # answer = Dropout(0.3)(answer) answer = LSTM(self.lstm_size)(answer) # Generate tensors of shape 32 answer = Dropout(0.3)(answer) answer = Dense(vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model model = Model([input_sequence, question], answer) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print("-------------Model Summary------------") print(model.summary()) return model
encoder_outputs= encoder(encoder_inputs) #, state_h, state_c # We discard `encoder_outputs` and only keep the states. #encoder_states = [state_h, state_c] #print(encoder_outputs.shape) encoder_last = encoder_outputs[:,-1,:] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None,1))#(None, num_decoder_tokens) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder = Embedding(len(dictionary), latent_dim)(decoder_inputs)#, input_length=(35) decoder=Reshape((35,latent_dim))(decoder) decoder_lstm= LSTM(latent_dim, return_sequences=True) decoder_outputs= decoder_lstm(decoder, initial_state=[encoder_last, encoder_last])#, d_h, d_c attention0 = dot([decoder_outputs, encoder_outputs], axes=[2,2]) attention = Activation('softmax')(attention0) context = dot([attention, encoder_outputs], axes=[2,1]) decoder_combined_context = concatenate([context, decoder_outputs]) # Has another weight + tanh layer as described in equation (5) of the paper output = TimeDistributed(Dense(latent_dim, activation="tanh"))(decoder_combined_context) # equation (5) of the paper output = TimeDistributed(Dense(num_decoder_tokens, activation="softmax"))(output) # Define the model that will turn # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` model = Model([encoder_inputs, decoder_inputs], output) # print the model print(model.summary()) model.compile(loss='categorical_crossentropy',optimizer='adam')
def create_model(self, num_inputs, num_outputs): if self.model_type == 'mlp': model = Sequential() def init(): return keras.initializers.TruncatedNormal( mean=0.0, stddev=0.001, seed=np.random.randint(2**32)) model.add( Dense(64, activation='tanh', input_shape=(num_inputs, ), kernel_initializer=init(), bias_initializer=init())) model.add( Dense(num_outputs, activation='linear', kernel_initializer=init(), bias_initializer=init())) # adam = optimizers.Adam(clipnorm=1.) model.compile(loss='mean_squared_error', optimizer='Adam', metrics=['accuracy']) elif self.model_type == 'cnn': # input layer # 3 channels: holes, goals, player # and actions def init(): seed = np.random.randint(2**32) return keras.initializers.TruncatedNormal(mean=0.0, stddev=0.001, seed=seed) inp = Input(shape=(self.grid_shape[0], self.grid_shape[1], 1), name='grid') actions = Input(shape=(self.dim_of_actions, ), name='mask') neighbors = Input(shape=(2 * self.dim_of_actions, ), name='holes_and_goals') # Grid feature extraction seed = np.random.randint(2**32) conv1 = Conv2D(16, kernel_size=2, activation='elu', padding='SAME', data_format='channels_last', kernel_initializer=init(), bias_initializer=init())(inp) # conv2 = Conv2D(16, kernel_size=3, activation='elu', padding='SAME', data_format='channels_last',kernel_initializer=init(), bias_initializer=init())(conv1) flat1 = Flatten()(conv1) # Holes + goals feature extractor # flat2 = Dense(20, activation='elu',kernel_initializer=init(), bias_initializer=init())(neighbors) # merge feature extractors # merge = concatenate([flat1, flat2]) # interpret hidden1 = Dense(10, activation='elu', kernel_initializer=init(), bias_initializer=init())(flat1) hidden2 = Dense(self.dim_of_actions, activation='linear', kernel_initializer=init(), bias_initializer=init())(hidden1) output = dot([hidden2, actions], 1) # predict # output = Dense(1, activation='linear',kernel_initializer=init(), bias_initializer=init())(hidden1) model = KerasModel(inputs=[inp, neighbors, actions], outputs=output) model.compile(loss='mean_squared_error', optimizer='Adam', metrics=['accuracy']) else: raise NotImplemented # model.summary() return model
def generate_model(self): """ Model for RNN with Encoder Decoder for S2S with attention ------------- json config: "arch": { "neurons":32, "k_reg": "None", "k_regw": 0.1, "rec_reg": "None", "rec_regw": 0.1, "drop": 0.3, "nlayersE": 1, "nlayersD": 1, "activation": "relu", "activation_r": "hard_sigmoid", "CuDNN": false, "rnn": "GRU", "full": [64, 32], "mode": "RNN_ED_s2s_att" } :return: """ neurons = self.config['arch']['neurons'] drop = self.config['arch']['drop'] nlayersE = self.config['arch']['nlayersE'] # >= 1 nlayersD = self.config['arch']['nlayersD'] # >= 1 activation = self.config['arch']['activation'] activation_r = self.config['arch']['activation_r'] activation_fl = self.config['arch']['activation_fl'] rec_reg = self.config['arch']['rec_reg'] rec_regw = self.config['arch']['rec_regw'] k_reg = self.config['arch']['k_reg'] k_regw = self.config['arch']['k_regw'] rnntype = self.config['arch']['rnn'] CuDNN = self.config['arch']['CuDNN'] # neuronsD = self.config['arch']['neuronsD'] full_layers = self.config['arch']['full'] # Extra added from training function idimensions = self.config['idimensions'] odimensions = self.config['odimensions'] impl = self.runconfig.impl if rec_reg == 'l1': rec_regularizer = l1(rec_regw) elif rec_reg == 'l2': rec_regularizer = l2(rec_regw) else: rec_regularizer = None if k_reg == 'l1': k_regularizer = l1(k_regw) elif rec_reg == 'l2': k_regularizer = l2(k_regw) else: k_regularizer = None RNN = LSTM if rnntype == 'LSTM' else GRU # Encoder RNN - First Input enc_input = Input(shape=(idimensions[0])) encoder = RNN(neurons, implementation=impl, recurrent_dropout=drop, activation=activation, recurrent_activation=activation_r, recurrent_regularizer=rec_regularizer, return_sequences=True, kernel_regularizer=k_regularizer)(enc_input) for i in range(1, nlayersE): encoder = RNN(neurons, implementation=impl, recurrent_dropout=drop, activation=activation, recurrent_activation=activation_r, recurrent_regularizer=rec_regularizer, return_sequences=True, kernel_regularizer=k_regularizer)(encoder) encoder_last = encoder[:, -1, :] # Decoder RNN - Second input (Teacher Forcing) dec_input = Input(shape=(None, 1)) decoder = RNN(neurons, implementation=impl, recurrent_dropout=drop, activation=activation, recurrent_activation=activation_r, recurrent_regularizer=rec_regularizer, return_sequences=True, kernel_regularizer=k_regularizer)(dec_input) for i in range(1, nlayersD): decoder = RNN(neurons, implementation=impl, recurrent_dropout=drop, activation=activation, recurrent_activation=activation_r, recurrent_regularizer=rec_regularizer, return_sequences=True, kernel_regularizer=k_regularizer)(decoder) # Attention Layer attention = dot([decoder, encoder], axes=[2, 2]) attention = Activation('softmax', name='attention')(attention) context = dot([attention, encoder], axes=[2, 1]) # print('context', context) decoder_combined_context = concatenate([context, decoder]) # print('decoder_combined_context', decoder_combined_context) output = TimeDistributed( Dense(full_layers[0], activation=activation_fl))(decoder_combined_context) for l in full_layers[1:]: output = TimeDistributed(Dense(l, activation=activation_fl))(output) output = TimeDistributed(Dense(1, activation="linear"))(output) self.model = Model(inputs=[enc_input, dec_input], outputs=output)
protTensor=Input(shape=(protTrainIN.shape[1],), name='FastProt') if activationFunc=='selu': x1=layers.AlphaDropout(dropoutRate)(protTensor) else: x1=layers.Dropout(dropoutRate)(protTensor) x1=layers.Dense(units=32, activation=activationFunc, kernel_initializer=myInitializer, kernel_regularizer=regularizers.l1_l2(l1=0, l2=0.01))(x1) rnaTensor=Input(shape=(rnaTrainIN.shape[1],), name='FastRNA') if activationFunc=='selu': x2=layers.AlphaDropout(dropoutRate)(rnaTensor) else: x2=layers.Dropout(dropoutRate)(rnaTensor) x2=layers.Dense(units=32, activation=activationFunc, kernel_initializer=myInitializer, kernel_regularizer=regularizers.l1_l2(l1=0, l2=0.01))(x2) merged=layers.dot([x1, x2], -1) #merged=kronecker([x1, x2]) #merged=layers.concatenate([x1, x2]) #merged=layers.multiply([x1, x2]) similarity=layers.Dense(units=1, kernel_regularizer=regularizers.l1_l2(l1=l1weight, l2=l2weight))(merged) network1=models.Model([protTensor, rnaTensor], similarity) network1.compile(optimizer=myOptimizer, loss=myLoss, metrics=[correlation_coefficient_loss]) callbacksList=[ModelCheckpoint(filepath=checkPtFile, verbose=1, monitor="val_loss", save_best_only=True), ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=plateauPatience, min_lr=0.000001), EarlyStopping(monitor="val_loss", patience=earlyStopPatience), TensorBoard(tensorBoardDir, histogram_freq=0, embeddings_freq=0)] ############# ## fit model ############# ## no-generator version: history=network1.fit([protTrainIN, rnaTrainIN], similarityTrainIN, batch_size=batchSize, epochs=epochsNum, verbose=2, callbacks=callbacksList, validation_split=0.1, shuffle=True) #############
def create_siamese_lstm_dssm_mdoel(embedding_matrix,embedding_word_matrix,model_param,embedding_size = 300,max_sentence_length = 20,max_word_length=25): # 第一部分 # step 1 定义复杂模型的输入 num_conv2d_layers = 1 filters_2d = [6, 12] kernel_size_2d = [[3, 3], [3, 3]] mpool_size_2d = [[2, 2], [2, 2]] left_input = Input(shape=(max_sentence_length,), dtype='int32') right_input = Input(shape=(max_sentence_length,), dtype='int32') # 定义需要使用的网络层 embedding_layer1 = Embedding( input_dim=len(embedding_matrix, ), output_dim=embedding_size, weights=[embedding_matrix], trainable=True, input_length=max_sentence_length ) att_layer1 = AttentionLayer(20) bi_lstm_layer =Bidirectional(LSTM(model_param['lstm_units'])) lstm_layer1 = LSTM(model_param['lstm_units'], return_sequences=True) lstm_layer2 = LSTM(model_param['lstm_units']) # 组合模型结构,两个输入添加Embeding层 s1 = embedding_layer1(left_input) s2 = embedding_layer1(right_input) # 在Embeding层上添加双向LSTM层 s1_bi = bi_lstm_layer(s1) s2_bi = bi_lstm_layer(s2) # 另在Embeding层上添加双层LSTM层 s1_lstm_lstm = lstm_layer2(lstm_layer1(s1)) s2_lstm_lstm = lstm_layer2(lstm_layer1(s2)) s1_lstm = lstm_layer1(s1) s2_lstm = lstm_layer1(s2) # cnn_input_layer = dot([s1_lstm,s2_lstm],axes=-1) cnn_input_layer_dot = Reshape((20,20,-1))(cnn_input_layer) layer_conv1 = Conv2D(filters=8,kernel_size=3,padding='same',activation='relu')(cnn_input_layer_dot) z = MaxPooling2D(pool_size=(2,2))(layer_conv1) for i in range(num_conv2d_layers): z = Conv2D(filters=filters_2d[i], kernel_size=kernel_size_2d[i], padding='same', activation='relu')(z) z = MaxPooling2D(pool_size=(mpool_size_2d[i][0], mpool_size_2d[i][1]))(z) pool1_flat = Flatten()(z) # # print pool1_flat pool1_flat_drop = Dropout(rate=0.1)(pool1_flat) ccn1 = Dense(32, activation='relu')(pool1_flat_drop) ccn2 = Dense(16, activation='relu')(ccn1) # 另在Embeding层上添加attention层 s1_att = att_layer1(s1) s2_att = att_layer1(s2) # 组合在Embeding层上添加attention层和在Embeding层上添加双向LSTM层 s1_last = Concatenate(axis=1)([s1_att,s1_bi]) s2_last = Concatenate(axis=1)([s2_att,s2_bi]) cos_layer = ConsDist()([s1_last,s2_last]) man_layer = ManDist()([s1_last,s2_last]) # 第二部分 left_w_input = Input(shape=(max_word_length,), dtype='int32') right_w_input = Input(shape=(max_word_length,), dtype='int32') # 定义需要使用的网络层 embedding_layer2 = Embedding( input_dim=len(embedding_word_matrix, ), output_dim=embedding_size, weights=[embedding_word_matrix], trainable=True, input_length=max_word_length ) lstm_word_bi_layer = Bidirectional(LSTM(6)) att_layer2 = AttentionLayer(25) s1_words = embedding_layer2(left_w_input) s2_words = embedding_layer2(right_w_input) # s1_word_lstm = lstm_layer1(s1_words) # s2_word_lstm = lstm_layer1(s2_words) # # cnn_input_layer1 = dot([s1_word_lstm, s2_word_lstm], axes=-1) # cnn_input_layer_dot1 = Reshape((25, 25, -1))(cnn_input_layer1) # layer_conv11 = Conv2D(filters=8, kernel_size=3, padding='same', activation='relu')(cnn_input_layer_dot1) # z1 = MaxPooling2D(pool_size=(2, 2))(layer_conv11) # # for i in range(num_conv2d_layers): # z1 = Conv2D(filters=filters_2d[i], kernel_size=kernel_size_2d[i], padding='same', activation='relu')(z1) # z1 = MaxPooling2D(pool_size=(mpool_size_2d[i][0], mpool_size_2d[i][1]))(z1) # # pool1_flat1 = Flatten()(z1) # # print pool1_flat # pool1_flat_drop1 = Dropout(rate=0.1)(pool1_flat1) # mlp11 = Dense(32, activation='relu')(pool1_flat_drop1) # mlp21 = Dense(16, activation='relu')(mlp11) s1_words_bi = lstm_word_bi_layer(s1_words) s2_words_bi = lstm_word_bi_layer(s2_words) s1_words_att = att_layer2(s1_words) s2_words_att = att_layer2(s2_words) s1_words_last = Concatenate(axis=1)([s1_words_att,s1_words_bi]) s2_words_last = Concatenate(axis=1)([s2_words_att,s2_words_bi]) cos_layer1 = ConsDist()([s1_words_last,s2_words_last]) man_layer1 = ManDist()([s1_words_last,s2_words_last]) # 第三部分,前两部分模型组合 s1_s2_mul = Multiply()([s1_last,s2_last]) s1_s2_sub = Lambda(lambda x: K.abs(x))(Subtract()([s1_last,s2_last])) s1_s2_maxium = Maximum()([Multiply()([s1_last,s1_last]),Multiply()([s2_last,s2_last])]) s1_s2_sub1 = Lambda(lambda x: K.abs(x))(Subtract()([s1_lstm_lstm,s2_lstm_lstm])) s1_words_s2_words_mul = Multiply()([s1_words_last,s2_words_last]) s1_words_s2_words_sub = Lambda(lambda x: K.abs(x))(Subtract()([s1_words_last,s2_words_last])) s1_words_s2_words_maxium = Maximum()([Multiply()([s1_words_last,s1_words_last]),Multiply()([s2_words_last,s2_words_last])]) last_list_layer = Concatenate(axis=1)([s1_s2_mul,s1_s2_sub,s1_s2_sub1,s1_s2_maxium,s1_words_s2_words_mul,s1_words_s2_words_sub,s1_words_s2_words_maxium]) last_list_layer = Dropout(0.05)(last_list_layer) # Dense 层 dense_layer1 = Dense(32,activation='relu')(last_list_layer) dense_layer2 = Dense(48,activation='sigmoid')(last_list_layer) output_layer = Concatenate(axis=1)([dense_layer1,dense_layer2,cos_layer,man_layer,cos_layer1,man_layer1,ccn2]) # Step4 定义输出层 output_layer = Dense(1, activation='sigmoid')(output_layer) model = Model( inputs=[left_input,right_input,left_w_input,right_w_input], outputs=[output_layer], name="simaese_lstm_attention" ) model.compile( # categorical_crossentropy,contrastive_loss,binary_crossentropy loss='binary_crossentropy', optimizer='adam', metrics=["accuracy", fbeta_score, precision, recall] ) return model
num_batches = train_size / Batch_size kl_loss_weight = 1.0 / num_batches input_target = Input((1, )) input_context = Input((1, )) embedding = EmbeddingVariation(100000, 100, kl_loss_weight=0.5, input_length=1, name='embedding') target = embedding(input_target) target = Reshape((vector_dim, 1))(target) context = embedding(input_context) context = Reshape((vector_dim, 1))(context) similarity = dot([target, context], axes=0, normalize=True) dot_product = dot([target, context], axes=1, normalize=False) dot_product = Reshape((1, ))(dot_product) output = DenseVariational(1, kl_loss_weight=kl_loss_weight, activation='sigmoid')(dot_product) model = Model(input=[input_target, input_context], output=output) #%% ''' x_in = Input(shape=(100,)) x = DenseVariational(20, kl_loss_weight=kl_loss_weight, activation='relu')(x_in) x = DenseVariational(20, kl_loss_weight=kl_loss_weight, activation='relu')(x) x = DenseVariational(1, kl_loss_weight=kl_loss_weight)(x) model = Model(x_in, x) '''
def test_TensorBoard_multi_input_output(tmpdir): np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_data_callbacks( input_shape=(input_dim, input_dim)) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) inp1 = Input((input_dim, input_dim)) inp2 = Input((input_dim, input_dim)) inp_3d = add([inp1, inp2]) inp_2d = GlobalAveragePooling1D()(inp_3d) # test a layer with a list of output tensors inp_pair = Lambda(lambda x: x)([inp_3d, inp_2d]) hidden = dot(inp_pair, axes=-1) hidden = Dense(num_hidden, activation='relu')(hidden) hidden = Dropout(0.1)(hidden) output1 = Dense(num_classes, activation='softmax')(hidden) output2 = Dense(num_classes, activation='softmax')(hidden) model = Model(inputs=[inp1, inp2], outputs=[output1, output2]) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq, embeddings_freq=1): return [callbacks.TensorBoard(log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, embeddings_freq=embeddings_freq, embeddings_layer_names=['dense_1'], embeddings_data=[X_test] * 2, batch_size=5)] # fit without validation data model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0), epochs=3) # fit with validation data and accuracy model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1), epochs=2) train_generator = data_generator([X_train] * 2, [y_train] * 2, batch_size) # fit generator without validation data model.fit_generator(train_generator, len(X_train), epochs=2, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0)) # fit generator with validation data and accuracy model.fit_generator(train_generator, len(X_train), epochs=2, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1)) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir()
def test_TensorBoard_multi_input_output(tmpdir): np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_test_data( num_train=train_samples, num_test=test_samples, input_shape=(input_dim, input_dim), classification=True, num_classes=num_classes) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) def data_generator(train): if train: max_batch_index = len(X_train) // batch_size else: max_batch_index = len(X_test) // batch_size i = 0 while 1: if train: # simulate multi-input/output models yield ([X_train[i * batch_size: (i + 1) * batch_size]] * 2, [y_train[i * batch_size: (i + 1) * batch_size]] * 2) else: yield ([X_test[i * batch_size: (i + 1) * batch_size]] * 2, [y_test[i * batch_size: (i + 1) * batch_size]] * 2) i += 1 i = i % max_batch_index inp1 = Input((input_dim, input_dim)) inp2 = Input((input_dim, input_dim)) inp_3d = add([inp1, inp2]) inp_2d = GlobalAveragePooling1D()(inp_3d) # test a layer with a list of output tensors inp_pair = Lambda(lambda x: x)([inp_3d, inp_2d]) hidden = dot(inp_pair, axes=-1) hidden = Dense(num_hidden, activation='relu')(hidden) hidden = Dropout(0.1)(hidden) output1 = Dense(num_classes, activation='softmax')(hidden) output2 = Dense(num_classes, activation='softmax')(hidden) model = Model(inputs=[inp1, inp2], outputs=[output1, output2]) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq, embeddings_freq=1): return [callbacks.TensorBoard(log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, embeddings_freq=embeddings_freq, embeddings_layer_names=['dense_1'], embeddings_data=[X_test] * 2, batch_size=5)] # fit without validation data model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0), epochs=3) # fit with validation data and accuracy model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1), epochs=2) # fit generator without validation data model.fit_generator(data_generator(True), len(X_train), epochs=2, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0)) # fit generator with validation data and accuracy model.fit_generator(data_generator(True), len(X_train), epochs=2, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1)) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir()
#blendW1 = TimeDistributed(Dense(latent_dim)(en_seq) #?,input_seq_length,latent_dim print ('blendW1') print (blendW1) #blendW2 = TimeDistributed(Dense(latent_dim),ouput_dim=1)(dec_seq) blendW2 = TimeDistributed(Dense(latent_dim))(dec_seq) print ('blendW2') print (blendW2) blend3 = tanh(blendW1+blendW2) print ("blend3") print (blend3) #blend3 = K.squeeze(blend3,0) #print ("blend3 squeezed") #print (blend3) U = dot([blend3,vt],(0,1)) print ('U') print (U) U = K.squeeze(U, 0) print ('U squeezed') print (U) # make probability tensor decoder_dense = Dense(num_encoder_tokens, activation='softmax') outputs = decoder_dense(U) print ('outputs') print (outputs) #outputs = K.slice(outputs,(0,0),(max_decoder_seq_length,num_encoder_tokens))(outputs) print ('outputs2')
def build_model(vectors, shape, settings): max_length, nr_hidden, nr_class = shape input1 = layers.Input(shape=(max_length,), dtype="int32", name="words1") input2 = layers.Input(shape=(max_length,), dtype="int32", name="words2") # embeddings (projected) embed = create_embedding(vectors, max_length, nr_hidden) a = embed(input1) b = embed(input2) # step 1: attend F = create_feedforward(nr_hidden) att_weights = layers.dot([F(a), F(b)], axes=-1) G = create_feedforward(nr_hidden) if settings["entail_dir"] == "both": norm_weights_a = layers.Lambda(normalizer(1))(att_weights) norm_weights_b = layers.Lambda(normalizer(2))(att_weights) alpha = layers.dot([norm_weights_a, a], axes=1) beta = layers.dot([norm_weights_b, b], axes=1) # step 2: compare comp1 = layers.concatenate([a, beta]) comp2 = layers.concatenate([b, alpha]) v1 = layers.TimeDistributed(G)(comp1) v2 = layers.TimeDistributed(G)(comp2) # step 3: aggregate v1_sum = layers.Lambda(sum_word)(v1) v2_sum = layers.Lambda(sum_word)(v2) concat = layers.concatenate([v1_sum, v2_sum]) elif settings["entail_dir"] == "left": norm_weights_a = layers.Lambda(normalizer(1))(att_weights) alpha = layers.dot([norm_weights_a, a], axes=1) comp2 = layers.concatenate([b, alpha]) v2 = layers.TimeDistributed(G)(comp2) v2_sum = layers.Lambda(sum_word)(v2) concat = v2_sum else: norm_weights_b = layers.Lambda(normalizer(2))(att_weights) beta = layers.dot([norm_weights_b, b], axes=1) comp1 = layers.concatenate([a, beta]) v1 = layers.TimeDistributed(G)(comp1) v1_sum = layers.Lambda(sum_word)(v1) concat = v1_sum H = create_feedforward(nr_hidden) out = H(concat) out = layers.Dense(nr_class, activation="softmax")(out) model = Model([input1, input2], out) model.compile( optimizer=optimizers.Adam(lr=settings["lr"]), loss="categorical_crossentropy", metrics=["accuracy"], ) return model
def build_model(embedding_size=512, lr=0.01, optimizer='adam', depth=2, hidden=0, hidden2=0, loss='mse', hidden_activation='tanh', output_activation='linear', dr1=0.0, dr2=0.0, output_size=1, molecular_attributes=False, use_fp=None, inner_rep=32, verbose=False): '''Generates simple embedding model to use molecular tensor as input in order to predict a single-valued output (i.e., yield) inputs: embedding_size - size of fingerprint for GraphFP layer lr - learning rate to use (train_model overwrites this value) optimizer - optimization function to use depth - depth of the neural fingerprint (i.e., radius) hidden - number of hidden tanh nodes after FP (0 is linear) hidden2 - number of hidden nodes after "hidden" layer hidden_activation - activation function used in hidden layers output_activation - activation function for final output nodes dr1 - dropout rate after embedding dr2 - dropout rate after hidden loss - loss function as a string (e.g., 'mse') molecular_attributes - whether to include additional molecular attributes in the atom-level features (recommended) use_fp - whether the representation used is actually a fingerprint and not a convolutional network (for benchmarking) outputs: model - a Keras model''' # Graph model if type(use_fp) == type(None): F_atom, F_bond = sizeAttributeVectors( molecular_attributes=molecular_attributes) mat_features = Input( shape=(None, F_atom), name="feature-matrix") # shape = (n_atoms, n_atom_features) mat_adjacency = Input( shape=(None, None), name="adjacency/self-matrix") # shape = (n_atoms, n_atoms) mat_specialbondtypes = Input( shape=(None, F_bond), name="special-bond-types") # shape = (n_atoms, n_bond_features) # Lists to keep track of keras features all_mat_features = [mat_features] contribs_by_atom = [] actual_contribs_for_atoms = [] actual_bond_contribs_for_atoms = [] output_contribs_byatom = [] output_contribs = [] unactivated_features = [] sum_across_atoms = lambda x: K.sum(x, axis=1) sum_across_atoms_shape = lambda x: (x[0], x[2]) for d in range(0, depth + 1): if verbose: print('### DEPTH {}'.format(d)) print( 'KERAS SHAPE OF ALL_MAT_FEATURES[d]' ) # Get the output contribution using all_mat_features[d] print(all_mat_features[d]._keras_shape) print('K.ndim of all_mat_features') print(K.ndim(all_mat_features[d])) output_contribs_byatom.append( TimeDistributed( Dense(embedding_size, activation='softmax'), name='d{}-out'.format(d), )(all_mat_features[d])) if verbose: print('Added depth {} output contribution (still atom-wise)'. format(d)) output_contribs.append( Lambda(sum_across_atoms, output_shape=sum_across_atoms_shape, name="d{}-out-sum-across-atoms".format(d))( output_contribs_byatom[d])) if verbose: print( 'Added depth {} output contribution (summed across atoms)'. format(d)) # Update if needed if d < depth: contribs_by_atom.append( TimeDistributed( Dense(inner_rep, activation='linear'), name="d{}-atom-to-atom".format(d), )(all_mat_features[d])) if verbose: print('Calculated new atom features for each atom, d {}'. format(d)) print('ndim: {}'.format(K.ndim(contribs_by_atom[-1]))) actual_contribs_for_atoms.append( dot([mat_adjacency, contribs_by_atom[d]], axes=(0, 0), name="d{}-multiply-atom-contribs-and-adj-mat".format( d))) if verbose: print('Multiplied new atom features by adj matrix, d = {}'. format(d)) print('ndim: {}'.format( K.ndim(actual_contribs_for_atoms[-1]))) actual_bond_contribs_for_atoms.append( TimeDistributed( Dense(inner_rep, activation='linear', use_bias=False), name="d{}-get-bond-contributions-to-new-atom-features". format(d), )(mat_specialbondtypes)) if verbose: print( 'Calculated bond effects on new atom features d = {}'. format(d)) print('ndim: {}'.format( K.ndim(actual_bond_contribs_for_atoms[-1]))) unactivated_features.append( add( [ actual_contribs_for_atoms[d], actual_bond_contribs_for_atoms[d] ], name= 'd{}-combine-atom-and-bond-contributions-to-new-atom-features' .format(d), )) if verbose: print( 'Calculated summed features, unactivated, for d = {}'. format(d)) print('ndim: {}'.format(K.ndim(unactivated_features[-1]))) all_mat_features.append( Activation(hidden_activation, name="d{}-inner-update-activation".format(d))( unactivated_features[d])) if verbose: print( 'Added activation layer for new atom features, d = {}'. format(d)) print('ndim: {}'.format(K.ndim(all_mat_features[-1]))) if len(output_contribs) > 1: FPs = add(output_contribs, name='pool-across-depths') else: FPs = output_contribs[0] else: FPs = Input(shape=(512, ), name="input-fingerprint") if hidden > 0: h1 = Dense(hidden, activation=hidden_activation)(FPs) h1d = Dropout(dr1)(h1) if verbose: print(' model: added {} Dense layer (-> {})'.format( hidden_activation, hidden)) if hidden2 > 0: h2 = Dense(hidden2, activation=hidden_activation)(h1) if verbose: print(' model: added {} Dense layer (-> {})'.format( hidden_activation, hidden2)) h = Dropout(dr2)(h2) else: h = h1d else: h = FPs ypred = Dense(output_size, activation=output_activation)(h) if verbose: print( ' model: added output Dense layer (-> {})'.format(output_size)) if type(use_fp) == type(None): model = Model( inputs=[mat_features, mat_adjacency, mat_specialbondtypes], outputs=[ypred]) else: model = Model(inputs=[FPs], outputs=[ypred]) if verbose: model.summary() # Compile if optimizer == 'adam': optimizer = Adam(lr=lr) elif optimizer == 'rmsprop': optimizer = RMSprop(lr=lr) elif optimizer == 'adagrad': optimizer = Adagrad(lr=lr) elif optimizer == 'adadelta': optimizer = Adadelta() else: print('Unrecognized optimizer') quit(1) # Custom loss to filter out NaN values in multi-task predictions if loss == 'custom': loss = mse_no_NaN elif loss == 'custom2': loss = binary_crossnetropy_no_NaN if verbose: print('compiling...', ) model.compile(loss=loss, optimizer=optimizer) if verbose: print('done') return model
input_1 = Input(shape=( 20,100), dtype='float32') input_2 = Input(shape=( 15,100), dtype='float32') out_1 = conv_4(input_1) out_2 = conv_4(input_2) print 'out1 shape...',K.int_shape(out_1) print 'out2 shape...',K.int_shape(out_2) attention = AttentionLayer()([out_1,out_2]) # out_1 column wise att_1 = GlobalMaxPooling1D()(attention) att_1 = Activation('softmax')(att_1) print 'attention shape',K.int_shape(att_1) att_1 =Lambda(lambda x: K.expand_dims(x, 2))(att_1) out1 = dot([att_1, out_2], axes=1) # out_2 row wise attention_transposed = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)))(attention) att_2 = GlobalMaxPooling1D()(attention_transposed) att_2 = Activation('softmax')(att_2) att_2 =Lambda(lambda x: K.expand_dims(x, 2))(att_2) out2 = dot([att_2, out_1], axes=1) distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([out1, out2]) model = Model(inputs=[input_1, input_2], outputs=distance) from keras.utils.vis_utils import plot_model plot_model(model,'model.png',show_shapes=True)
vocab, couples, labels = process_data(train_filename, window_size) # generate model vocab_size = len(vocab) word_target, word_context = zip(*couples) input_target = Input((1,)) input_context = Input((1,)) embedding = Embedding(vocab_size, embedding_size, input_length=1) target = embedding(input_target) target = Reshape((embedding_size,))(target) context = embedding(input_context) context = Reshape((embedding_size,))(context) dot_product = dot([target, context], 1) dot_product = Reshape((1,))(dot_product) output = Dense(1, activation='sigmoid')(dot_product) model = Model(input=[input_target, input_context], output=output) model.summary() model.compile(loss=loss, optimizer=optimizer, metrics=metrics) model.fit_generator(batch_generator(word_target, word_context, labels, batch_size), steps_per_epoch=batch_size, epochs=epochs) save_embedding('skipgram-embedding_labeled.txt', embedding.get_weights()[0], vocab)
def model(vocab_size, query_maxlen, story_maxlen): input_sequence = Input((story_maxlen, )) question = Input((query_maxlen, )) # encoders # embed the input sequence into a sequence of vectors input_encoder_m = Sequential() input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) input_encoder_m.add(Dropout(0.3)) # output: (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen input_encoder_c = Sequential() input_encoder_c.add( Embedding(input_dim=vocab_size, output_dim=query_maxlen)) input_encoder_c.add(Dropout(0.3)) # output: (samples, story_maxlen, query_maxlen) # embed the question into a sequence of vectors question_encoder = Sequential() question_encoder.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(0.3)) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen) response = Permute( (2, 1))(response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. answer = LSTM(32)(answer) # (samples, 32) # one regularization layer -- more would probably be needed. answer = Dropout(0.3)(answer) answer = Dense(vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model model = Model([input_sequence, question], answer) return model
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=( None, None, None, ), name='context', dtype='int32') query = L.Input(shape=(None, ), name='query', dtype='int32') # Flatten preds to embed entire rules var_flat = L.Lambda(lambda x: K.reshape( x, K.stack([K.shape(x)[0], -1, K.prod(K.shape(x)[2:])])), name='var_flat') flat_ctx = var_flat(context) # (?, rules, preds*chars) # Onehot embeddeding of symbols onehot_weights = np.eye(char_size) onehot_weights[0, 0] = 0 # Clear zero index onehot = L.Embedding(char_size, char_size, trainable=False, weights=[onehot_weights], name='onehot') embedded_ctx = onehot(flat_ctx) # (?, rules, preds*chars*char_size) embedded_q = onehot(query) # (?, chars, char_size) # Embed predicates embed_pred = ZeroGRU(dim, go_backwards=True, return_sequences=True, return_state=True, name='embed_pred') embedded_predqs, embedded_predq = embed_pred(embedded_q) # (?, chars, dim) embed_pred.return_sequences = False embed_pred.return_state = False # Embed every rule embedded_rules = L.TimeDistributed(embed_pred, name='rule_embed')(embedded_ctx) # (?, rules, dim) # Reused layers over iterations concatm1 = L.Concatenate(name='concatm1') repeat_toqlen = L.RepeatVector(K.shape(embedded_q)[1], name='repeat_toqlen') mult_cqi = L.Multiply(name='mult_cqi') dense_cqi = L.Dense(dim, name='dense_cqi') dense_cais = L.Dense(1, name='dense_cais') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') softmax1 = L.Softmax(axis=1, name='softmax1') dot11 = L.Dot((1, 1), name='dot11') repeat_toctx = L.RepeatVector(K.shape(context)[1], name='repeat_toctx') memory_dense = L.Dense(dim, name='memory_dense') kb_dense = L.Dense(dim, name='kb_dense') mult_info = L.Multiply(name='mult_info') info_dense = L.Dense(dim, name='info_dense') mult_att_dense = L.Multiply(name='mult_att_dense') read_att_dense = L.Dense(1, name='read_att_dense') mem_info_dense = L.Dense(dim, name='mem_info_dense') stack1 = L.Lambda(lambda xs: K.stack(xs, 1), output_shape=(None, dim), name='stack1') mult_self_att = L.Multiply(name='mult_self_att') self_att_dense = L.Dense(1, name='self_att_dense') misa_dense = L.Dense(dim, use_bias=False, name='misa_dense') mi_info_dense = L.Dense(dim, name='mi_info_dense') add_mip = L.Lambda(lambda xy: xy[0] + xy[1], name='add_mip') control_gate = L.Dense(1, activation='sigmoid', name='control_gate') gate2 = L.Lambda(lambda xyg: xyg[2] * xyg[0] + (1 - xyg[2]) * xyg[1], name='gate') # Init control and memory zeros_like = L.Lambda(K.zeros_like, name='zeros_like') memory = embedded_predq # (?, dim) control = zeros_like(memory) # (?, dim) pmemories, pcontrols = [memory], [control] # Reasoning iterations outs = list() for i in range(iterations): # Control Unit qi = L.Dense(dim, name='qi' + str(i))(embedded_predq) # (?, dim) cqi = dense_cqi(concatm1([control, qi])) # (?, dim) cais = dense_cais(mult_cqi([repeat_toqlen(cqi), embedded_predqs])) # (?, qlen, 1) cais = squeeze2(cais) # (?, qlen) cais = softmax1(cais) # (?, qlen) outs.append(cais) new_control = dot11([cais, embedded_predqs]) # (?, dim) # Read Unit info = mult_info( [repeat_toctx(memory_dense(memory)), kb_dense(embedded_rules)]) # (?, rules, dim) infop = info_dense(concatm1([info, embedded_rules])) # (?, rules, dim) rai = read_att_dense(mult_att_dense([repeat_toctx(new_control), infop])) # (?, rules, 1) rai = squeeze2(rai) # (?, rules) rai = softmax1(rai) # (?, rules) outs.append(rai) read = dot11([rai, embedded_rules]) # (?, dim) # Write Unit mi_info = mem_info_dense(concatm1([read, memory])) # (?, dim) past_ctrls = stack1(pcontrols) # (?, i+1, dim) sai = self_att_dense( mult_self_att([L.RepeatVector(i + 1)(new_control), past_ctrls])) # (?, i+1, 1) sai = squeeze2(sai) # (?, i+1) sai = softmax1(sai) # (?, i+1) outs.append(sai) past_mems = stack1(pmemories) # (?, i+1, dim) misa = L.dot([sai, past_mems], (1, 1), name='misa_' + str(i)) # (?, dim) mip = add_mip([misa_dense(misa), mi_info_dense(mi_info)]) # (?, dim) cip = control_gate(new_control) # (?, 1) outs.append(cip) new_memory = gate2([mip, memory, cip]) # (?, dim) # Update state pcontrols.append(new_control) pmemories.append(new_memory) memory, control = new_memory, new_control # Output Unit out = L.Dense(1, activation='sigmoid', name='out')(concatm1([embedded_predq, memory])) if training: model = Model([context, query], out) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
decoder_cannabis, initial_state=[encoder_last_cannabis, encoder_last_cannabis]) print('decoder_cannabis', decoder_cannabis) # For the plain Sequence-to-Sequence, we produced the output from directly from decoder # output = TimeDistributed(Dense(output_dict_size, activation="softmax"))(decoder) #2.2 Attention Mechanism #Reference: Effective Approaches to Attention-based Neural Machine Translation's Global Attention with Dot-based scoring function (Section 3, 3.1) https://arxiv.org/pdf/1508.04025.pdf from keras.layers import Activation, dot, concatenate # Equation (7) with 'dot' score from Section 3.1 in the paper. # Note that we reuse Softmax-activation layer instead of writing tensor calculation attention_cannabis = dot([decoder_cannabis, encoder_cannabis], axes=[2, 2]) attention_cannabis = Activation('softmax', name='attention')(attention_cannabis) print('attention', attention_cannabis) context_cannabis = dot([attention_cannabis, encoder_cannabis], axes=[2, 1]) print('context', context_cannabis) decoder_combined_context_cannabis = concatenate( [context_cannabis, decoder_cannabis]) print('decoder_combined_context', decoder_combined_context_cannabis) # Has another weight + tanh layer as described in equation (5) of the paper output_cannabis = TimeDistributed(Dense( 512, activation="tanh"))(decoder_combined_context_cannabis) output_cannabis = TimeDistributed(
#decoder_outputs = decoder_dense(decoder_outputs) # Equation (7) with 'dot' score from Section 3.1 in the paper. # Note that we reuse Softmax-activation layer instead of writing tensor calculation #print (decoder_lstm.shape) #print (encoder.shape) #print ('Decoder') #print (decoder_outputs) #print (decoder_hidden) #print (state_h) #attention = dot(decoder_hidden, state_h, axes=[2, 2]) #print (decoder_outputs[:,-1,:]) #print (encoder_outputs) #encoder_outputs is ?,20,256 #decoder_hidden is ?,256 attention = dot([decoder,encoder ],axes=[2,2]) attention = Activation('softmax', name='attention')(attention) print('attention', attention) #attention is ?,20 context = dot([attention, encoder], axes=[2,1]) print('context', context) decoder_combined_context = concatenate([context, decoder]) print('decoder_combined_context', decoder_combined_context) # Has another weight + tanh layer as described in equation (5) of the paper output = TimeDistributed(Dense(64, activation="tanh"))(decoder_combined_context) output = TimeDistributed(Dense(num_decoder_tokens, activation="softmax"))(output) print('output', output)