def call(self, x, mask=None): input_shape = K.shape(x) if self.dim_ordering == 'th': num_rows = input_shape[2] num_cols = input_shape[3] elif self.dim_ordering == 'tf': num_rows = input_shape[1] num_cols = input_shape[2] row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list] col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list] outputs = [] if self.dim_ordering == 'th': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = x[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], y2 - y1, x2 - x1, input_shape[3]] x_crop = x[:, y1:y2, x1:x2, :] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(1, 2)) outputs.append(pooled_val) outputs = K.concatenate(outputs) return outputs
def build(self): question, answer = self._get_inputs() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) question_embedding = embedding(question) a_embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) answer_embedding = embedding(answer) a_embedding.set_weights(embedding.get_weights()) # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # rnn forward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) backward_lstm = LSTM(self.config.get('n_lstm_dims', 141), consume_less='mem', return_sequences=True) question_lstm = merge([forward_lstm(question_dropout), backward_lstm(question_dropout)], mode='concat', concat_axis=-1) # dropout question_dropout = dropout(question_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) question_model = Model(input=[question], output=[question_output]) # attentional rnn forward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) backward_lstm = AttentionLSTM(self.config.get('n_lstm_dims', 141), question_output, consume_less='mem', return_sequences=True) answer_lstm = merge([forward_lstm(answer_dropout), backward_lstm(answer_dropout)], mode='concat', concat_axis=-1) # dropout answer_dropout = dropout(answer_lstm) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') answer_output = activation(answer_pool) answer_model = Model(input=[question, answer], output=[answer_output]) return question_model, answer_model
def train_cnn(x, y, validation_data, vocab_size, nb_ans, num_iter, batch_size=1, on_memory=True, early_stopping=-1, wemb_dim=200, wemb_init='glorot_uniform', wemb_dropout=0.2, cnn_nbfilter=[300, 200, 100], cnn_init='orthogonal', cnn_act='relu', cnn_border='valid', optimizer='adamax'): input_x = Input(shape=(x.shape[1],), dtype="int32") wordembediing = Embedding(input_dim=vocab_size, output_dim=wemb_dim, init=wemb_init, mask_zero=False, dropout=wemb_dropout)(input_x) unigram_cnn = Convolution1D(nb_filter=cnn_nbfilter[0], filter_length=1, init=cnn_init, activation=cnn_act, border_mode=cnn_border)(wordembediing) maxpooling_unigram_cnn = Lambda(function=lambda x: K.max(x, axis=1), output_shape=lambda shape: (shape[0],) + shape[2:])(unigram_cnn) bigram_cnn = Convolution1D(nb_filter=cnn_nbfilter[1], filter_length=2, init=cnn_init, activation=cnn_act, border_mode=cnn_border)(wordembediing) maxpooling_bigram_cnn = Lambda(function=lambda x: K.max(x, axis=1), output_shape=lambda shape: (shape[0],) + shape[2:])(bigram_cnn) trigram_cnn = Convolution1D(nb_filter=cnn_nbfilter[2], filter_length=3, init=cnn_init, activation=cnn_act, border_mode=cnn_border)(wordembediing) maxpooling_trigram_cnn = Lambda(function=lambda x: K.max(x, axis=1), output_shape=lambda shape: (shape[0],) + shape[2:])(trigram_cnn) merge_three_cnn = merge([maxpooling_unigram_cnn, maxpooling_bigram_cnn, maxpooling_trigram_cnn], mode='concat', concat_axis=1) predictions = Dense(nb_ans, activation='softmax')(merge_three_cnn) model = Model(input=input_x, output=predictions) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) callbacks = [EarlyStopping(monitor='val_loss', patience=early_stopping)] if early_stopping >= 0 else [] temp_model = tempfile.NamedTemporaryFile() callbacks.append(ModelCheckpoint(filepath=temp_model.name, save_best_only=True)) callbacks.append(FlushStdout()) shuffle = True if on_memory else 'batch' model.fit(x, y, batch_size=batch_size, nb_epoch=num_iter, validation_data=validation_data, shuffle=shuffle, callbacks=callbacks, verbose=2) sys.stdout.flush() model.load_weights(temp_model.name) loss, acc = model.evaluate(validation_data[0], validation_data[1], batch_size=batch_size) temp_model.close() return model, loss, acc
def __init__(self): # Input tensors holding the query, positive (clicked) document, and negative (unclicked) documents. # The first dimension is None because the queries and documents can vary in length. query = Input(shape = (None, WORD_DEPTH)) pos_doc = Input(shape = (None, WORD_DEPTH)) neg_docs = [Input(shape = (None, WORD_DEPTH)) for j in range(J)] query_conv = Convolution1D(K, FILTER_LENGTH, padding = "same", input_shape = (None, WORD_DEPTH), activation = "tanh")(query) # See equation (2). query_max = Lambda(lambda x: backend.max(x, axis = 1), output_shape = (K, ))(query_conv) # See section 3.4. query_sem = Dense(L, activation = "tanh", input_dim = K)(query_max) # See section 3.5. # The document equivalent of the above query model. doc_conv = Convolution1D(K, FILTER_LENGTH, padding = "same", input_shape = (None, WORD_DEPTH), activation = "tanh") doc_max = Lambda(lambda x: backend.max(x, axis = 1), output_shape = (K, )) doc_sem = Dense(L, activation = "tanh", input_dim = K) pos_doc_conv = doc_conv(pos_doc) neg_doc_convs = [doc_conv(neg_doc) for neg_doc in neg_docs] pos_doc_max = doc_max(pos_doc_conv) neg_doc_maxes = [doc_max(neg_doc_conv) for neg_doc_conv in neg_doc_convs] pos_doc_sem = doc_sem(pos_doc_max) neg_doc_sems = [doc_sem(neg_doc_max) for neg_doc_max in neg_doc_maxes] # This layer calculates the cosine similarity between the semantic representations of # a query and a document. R_Q_D_p = dot([query_sem, pos_doc_sem], axes = 1, normalize = True) # See equation (4). R_Q_D_ns = [dot([query_sem, neg_doc_sem], axes = 1, normalize = True) for neg_doc_sem in neg_doc_sems] # See equation (4). concat_Rs = concatenate([R_Q_D_p] + R_Q_D_ns) concat_Rs = Reshape((J + 1, 1))(concat_Rs) # In this step, we multiply each R(Q, D) value by gamma. In the paper, gamma is # described as a smoothing factor for the softmax function, and it's set empirically # on a held-out data set. We're going to learn gamma's value by pretending it's # a single 1 x 1 kernel. weight = np.array([1]).reshape(1, 1, 1) with_gamma = Convolution1D(1, 1, padding = "same", input_shape = (J + 1, 1), activation = "linear", use_bias = False, weights = [weight])(concat_Rs) # See equation (5). with_gamma = Reshape((J + 1, ))(with_gamma) # Finally, we use the softmax function to calculate P(D+|Q). prob = Activation("softmax")(with_gamma) # See equation (5). # We now have everything we need to define our model. self.model = Model(inputs = [query, pos_doc] + neg_docs, outputs = prob) self.model.compile(optimizer = "adadelta", loss = "categorical_crossentropy") self.encoder = Model(inputs=query, outputs=query_sem)
def _batch_hard_triplet_loss(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor: mask_anchor_positive = self._get_anchor_positive_triplet_mask(y_true, pairwise_dist) anchor_positive_dist = mask_anchor_positive * pairwise_dist hardest_positive_dist = K.max(anchor_positive_dist, axis=1, keepdims=True) mask_anchor_negative = self._get_anchor_negative_triplet_mask(y_true, pairwise_dist) anchor_negative_dist = mask_anchor_negative * pairwise_dist mask_anchor_negative = self._get_semihard_anchor_negative_triplet_mask(anchor_negative_dist, hardest_positive_dist, mask_anchor_negative) max_anchor_negative_dist = K.max(pairwise_dist, axis=1, keepdims=True) anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (1.0 - mask_anchor_negative) hardest_negative_dist = K.min(anchor_negative_dist, axis=1, keepdims=True) triplet_loss = K.clip(hardest_positive_dist - hardest_negative_dist + self.margin, 0.0, None) triplet_loss = K.mean(triplet_loss) return triplet_loss
def duel_atari_cnn(input_shape, n_actions, mode='mean'): """ Follows the network architecture described in the 2015 Deepmind Nature paper with the changes proposed in Dueling Network paper. input_shape: 3D Tensor (channels, height, width) format n_actions: int """ agg = None if mode == 'mean': agg = Lambda(lambda a: K.expand_dims(a[:,0], dim=-1) + a[:,1:] - K.mean(a[:, 1:], keepdims=True), output_shape=(n_actions,)) elif mode == 'max': agg = Lambda(lambda a: K.expand_dims(a[:,0], dim=-1) + a[:,1:] - K.max(a[:, 1:], keepdims=True), output_shape=(n_actions,)) else: raise ValueError("mode must be either 'mean' or 'max'") input = Input(shape=input_shape) x = Convolution2D(32, 8, 8, subsample=(4,4), activation='relu')(input) x = Convolution2D(64, 4, 4, subsample=(2,2), activation='relu')(x) x = Convolution2D(64, 3, 3, subsample=(1,1), activation='relu')(x) x = Flatten()(x) x = Dense(512, activation='relu')(x) x = Dense(n_actions+1)(x) output = agg(x) return Model(input, output)
def softmax(x): ndim = K.ndim(x) if ndim == 2: return K.softmax(x) elif ndim == 3: e = K.exp(x - K.max(x, axis=-1, keepdims=True)) s = K.sum(e, axis=-1, keepdims=True) return e / s elif ndim == 4: e = K.exp(x - K.max(x, axis=-1, keepdims=True)) s = K.sum(e, axis=-1, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor ' 'that is not 2D or 3D. ' 'Here, ndim=' + str(ndim))
def build(self): subject = self.get_subject() relation = self.relation object_ = self.object_good # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=weights, mask_zero=True) subject_embedding = embedding(subject) relation_embedding = embedding(relation) object_embedding = embedding(object_) # dropout dropout = Dropout(0.5) subject_dropout = dropout(subject_embedding) relation_dropout = dropout(relation_embedding) object_dropout = dropout(object_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) subject_maxpool = maxpool(subject_dropout) relation_maxpool = maxpool(relation_dropout) object_maxpool = maxpool(object_dropout) # activation activation = Activation('tanh') subject_output = activation(subject_maxpool) relation_output = activation(relation_maxpool) object_output = activation(object_maxpool) return subject_output, relation_output, object_output
def linkFeature(self, input_name, conv_name, activation='tanh'): filters = self.params.get('filters') nb_filter = self.params.get('nb_filter') convs = self.layers.get(conv_name) assert filters assert convs features = [] for fsz, conv in zip(filters, convs): conv_output = conv(self.tensors[input_name]) if type(activation) == type(''): act = Activation( activation, name='%s-act-%d' % (input_name, fsz) )(conv_output) else: act = activation( name='%s-advanced-act-%d' % (input_name, fsz) )(conv_output) maxpool = Lambda( lambda x: K.max(x, axis=1), output_shape=(nb_filter,), name='%s-maxpool-%d' % (input_name, fsz) )(act) features.append(maxpool) if len(features) > 1: return Merge(mode='concat', name='%s-feature' % input_name)(features) else: return features[0]
def binary_crossentropy_with_ranking(y_true, y_pred): """ Trying to combine ranking loss with numeric precision""" # first get the log loss like normal logloss = K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) # next, build a rank loss # clip the probabilities to keep stability y_pred_clipped = K.clip(y_pred, K.epsilon(), 1-K.epsilon()) # translate into the raw scores before the logit y_pred_score = K.log(y_pred_clipped / (1 - y_pred_clipped)) # determine what the maximum score for a zero outcome is y_pred_score_zerooutcome_max = K.max(y_pred_score * (y_true <1)) # determine how much each score is above or below it rankloss = y_pred_score - y_pred_score_zerooutcome_max # only keep losses for positive outcomes rankloss = rankloss * y_true # only keep losses where the score is below the max rankloss = K.square(K.clip(rankloss, -100, 0)) # average the loss for just the positive outcomes rankloss = K.sum(rankloss, axis=-1) / (K.sum(y_true > 0) + 1) # return (rankloss + 1) * logloss - an alternative to try return rankloss + logloss
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6): """Filters YOLO boxes by thresholding on object and class confidence. Arguments: box_confidence -- tensor of shape (19, 19, 5, 1) boxes -- tensor of shape (19, 19, 5, 4) box_class_probs -- tensor of shape (19, 19, 5, 80) threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box Returns: scores -- tensor of shape (None,), containing the class probability score for selected boxes boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes Note: "None" is here because you don't know the exact number of selected boxes, as it depends on the threshold. For example, the actual output size of scores would be (10,) if there are 10 boxes. """ # Step 1: Compute box scores box_scores = box_confidence * box_class_probs # [19, 19, 5, 1] * [19, 19, 5, 80] = [19, 19, 5, 80] # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score box_classes = K.argmax(box_scores, axis=-1) box_class_scores = K.max(box_scores, axis = -1, keepdims = False) # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold) filtering_mask = box_class_scores >= threshold # Step 4: Apply the mask to scores, boxes and classes scores = tf.boolean_mask(box_class_scores, filtering_mask) boxes = tf.boolean_mask(boxes, filtering_mask) classes = tf.boolean_mask(box_classes, filtering_mask) return scores, boxes, classes
def set_batch_function(self, model, input_shape, batch_size, nb_actions, gamma): input_dim = np.prod(input_shape) samples = K.placeholder(shape=(batch_size, input_dim * 2 + 3)) S = samples[:, 0 : input_dim] a = samples[:, input_dim] a = K.cast(a, '') r = samples[:, input_dim + 1] S_prime = samples[:, input_dim + 2 : 2 * input_dim + 2] game_over = samples[:, 2 * input_dim + 2 : 2 * input_dim + 3] r = K.reshape(r, (batch_size, 1)) r = K.repeat(r, nb_actions) r = K.reshape(r, (batch_size, nb_actions)) game_over = K.repeat(game_over, nb_actions) game_over = K.reshape(game_over, (batch_size, nb_actions)) S = K.reshape(S, (batch_size, ) + input_shape) S_prime = K.reshape(S_prime, (batch_size, ) + input_shape) X = K.concatenate([S, S_prime], axis=0) Y = model(X) Qsa = K.max(Y[batch_size:], axis=1) Qsa = K.reshape(Qsa, (batch_size, 1)) Qsa = K.repeat(Qsa, nb_actions) Qsa = K.reshape(Qsa, (batch_size, nb_actions)) delta = K.reshape(self.one_hot(a, nb_actions), (batch_size, nb_actions)) targets = (1 - delta) * Y[:batch_size] + delta * (r + gamma * (1 - game_over) * Qsa) self.batch_function = K.function(inputs=[samples], outputs=[S, targets])
def create_neural_network(self): model = Sequential() model.add(Dense(100, input_dim=self.nstates, activation='relu')) model.add(Dense(100, activation='relu')) model.add(Dense(self.nactions,activation='linear')) #get second last layer of the model, abondon the last layer layer = model.layers[-2] nb_action = model.output._keras_shape[-1] #layer y has a shape(nb_action+1) #y[:,0] represents V(s;theta) #y[:,1] represents A(a;theta) y = Dense(nb_action+1, activation='linear')(layer.output) #calculate the Q(s,a,;theta) #dueling type average -> Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Average_a(A(s,a;theta))) #outputlayer = Lambda(lambda a:K.expand_dims(a[:,0], -1) + a[:,1:] - K.mean(a[:,1:], keepdims=True), output_shape=(nb_action,))(y) #dueling type max -> Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Max_a(A(s,a;theta))) outputlayer = Lambda(lambda a:K.expand_dims(a[:,0], -1) + a[:,1:] - K.max(a[:,1:,], keepdims=True), output_shape=(nb_action,))(y) #dueling type naive -> Q(s,a;theta) = V(s;theta) + A(s,a;theta) #outputlayer = Lambda(lambda a: K.expand_dims(a[:,0], -1) + a[:,1:], output_shape=(nb_action,))(y) #connect model = Model(input=model.input, output=outputlayer) model.compile(loss='mse', optimizer=Adam(lr=self.alpha)) model_json = model.to_json() with open('cartpole.json','w') as json_file: json_file.write(model_json) return model
def build_model(self, p): S = Input(p['input_shape'], name='input_state') A = Input((1,), name='input_action', dtype='int32') R = Input((1,), name='input_reward') T = Input((1,), name='input_terminate', dtype='int32') NS = Input(p['input_shape'], name='input_next_sate') self.Q_model = self.build_cnn_model(p) self.Q_old_model = self.build_cnn_model(p, False) # Q hat in paper self.Q_old_model.set_weights(self.Q_model.get_weights()) # Q' = Q Q_S = self.Q_model(S) # batch * actions Q_NS = disconnected_grad(self.Q_old_model(NS)) # disconnected gradient is not necessary y = R + p['discount'] * (1-T) * K.max(Q_NS, axis=1, keepdims=True) # batch * 1 action_mask = K.equal(Tht.arange(p['num_actions']).reshape((1, -1)), A.reshape((-1, 1))) output = K.sum(Q_S * action_mask, axis=1).reshape((-1, 1)) loss = K.sum((output - y) ** 2) # sum could also be mean() optimizer = adam(p['learning_rate']) params = self.Q_model.trainable_weights update = optimizer.get_updates(params, [], loss) self.training_func = K.function([S, A, R, T, NS], loss, updates=update) self.Q_func = K.function([S], Q_S)
def _loss_tensor(y_true, y_pred): max_val = K.max(y_pred,axis=-2) #temporal axis! max_val = K.repeat(max_val,K.shape(y_pred)[-2]) print(K.eval(max_val)) mask = K.cast(K.equal(max_val,y_pred),K.floatx()) y_pred = mask * y_pred + (1-mask) * y_true return squared_hinge(y_true,y_pred)
def convolution_softmax(volume): """Like K.softmax, but we handle arbitrary volumes by only computing down axis 1.""" # The subtraction of K.max is for numeric stability. See T.nnet docs for # more. exps = K.exp(volume - K.max(volume, axis=1, keepdims=True)) return exps / K.sum(exps, axis=1, keepdims=True)
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6): """Filters YOLO boxes by thresholding on object and class confidence. Arguments: box_confidence -- tensor of shape (19, 19, 5, 1) boxes -- tensor of shape (19, 19, 5, 4) box_class_probs -- tensor of shape (19, 19, 5, 80) threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box Returns: scores -- tensor of shape (None,), containing the class probability score for selected boxes boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes """ # Step 1: Compute box scores box_scores = box_confidence*box_class_probs # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score box_classes = K.argmax(box_scores, axis=-1) box_class_scores = K.max(box_scores, axis=-1) # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold) filtering_mask = box_class_scores >= threshold # Step 4: Apply the mask to scores, boxes and classes scores = tf.boolean_mask(box_class_scores,filtering_mask) boxes = tf.boolean_mask(boxes,filtering_mask) classes = tf.boolean_mask(box_classes,filtering_mask) return scores, boxes, classes
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 100)) question_embedding = embedding(question) answer_embedding = embedding(answer) # turn off layer updating embedding.params = [] embedding.updates = [] # dropout dropout = Dropout(0.25) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # dense dense = TimeDistributed(Dense(self.model_params.get('n_hidden', 200), activation='tanh')) question_dense = dense(question_dropout) answer_dense = dense(answer_dropout) # regularization question_dense = ActivityRegularization(l2=0.0001)(question_dense) answer_dense = ActivityRegularization(l2=0.0001)(answer_dense) # dropout question_dropout = dropout(question_dense) answer_dropout = dropout(answer_dense) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=self.model_params.get('nb_filters', 1000), activation=self.model_params.get('conv_activation', 'relu'), border_mode='same') for filter_length in [2, 3, 5, 7]] question_cnn = merge([cnn(question_dropout) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_dropout) for cnn in cnns], mode='concat') # regularization question_cnn = ActivityRegularization(l2=0.0001)(question_cnn) answer_cnn = ActivityRegularization(l2=0.0001)(answer_cnn) # dropout question_dropout = dropout(question_cnn) answer_dropout = dropout(answer_cnn) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) answer_pool = maxpool(answer_dropout) # activation activation = Activation('tanh') question_output = activation(question_pool) answer_output = activation(answer_pool) return question_output, answer_output
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=500, activation='tanh', border_mode='same') for filter_length in [2, 3, 5, 7]] question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True enc = Dense(100, activation='tanh') question_pool = enc(maxpool(question_cnn)) answer_pool = enc(maxpool(answer_cnn)) return question_pool, answer_pool
def compile_saliency_function(model, activation_layer='block5_conv3'): input_img = model.input layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) layer_output = layer_dict[activation_layer].output max_output = K.max(layer_output, axis=3) saliency = K.gradients(K.sum(max_output), input_img)[0] return K.function([input_img, K.learning_phase()], [saliency])
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=weights, mask_zero=True) question_embedding = embedding(question) answer_embedding = embedding(answer) # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_maxpool = maxpool(question_dropout) answer_maxpool = maxpool(answer_dropout) # activation activation = Activation('tanh') question_output = activation(question_maxpool) answer_output = activation(answer_maxpool) return question_output, answer_output
def build(self): question = self.question answer = self.get_answer() # add embedding layers weights = np.load(self.config['initial_embed_weights']) embedding = Embedding(input_dim=self.config['n_words'], output_dim=weights.shape[1], # mask_zero=True, weights=[weights]) question_embedding = embedding(question) answer_embedding = embedding(answer) # question rnn part f_rnn = LSTM(141, return_sequences=True, consume_less='mem') b_rnn = LSTM(141, return_sequences=True, consume_less='mem', go_backwards=True) question_f_rnn = f_rnn(question_embedding) question_b_rnn = b_rnn(question_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) maxpool.supports_masking = True question_pool = merge([maxpool(question_f_rnn), maxpool(question_b_rnn)], mode='concat', concat_axis=-1) # answer rnn part from attention_lstm import AttentionLSTMWrapper f_rnn = AttentionLSTMWrapper(f_rnn, question_pool, single_attention_param=True) b_rnn = AttentionLSTMWrapper(b_rnn, question_pool, single_attention_param=True) answer_f_rnn = f_rnn(answer_embedding) answer_b_rnn = b_rnn(answer_embedding) answer_pool = merge([maxpool(answer_f_rnn), maxpool(answer_b_rnn)], mode='concat', concat_axis=-1) return question_pool, answer_pool
def build(i): #input = Input(shape=(sentence_length,)) # embedding embedding = Embedding(len(vocab), WORD_EMBEDDING_DIM) input_embedding = embedding(i) conv = Convolution1D( nb_filter=NB_FILTER, filter_length=FILTER_LENGTH, border_mode='same', #activation='tanh', subsample_length=1, W_constraint = maxnorm(3), b_constraint = maxnorm(3), #input_shape=(AMAX_TIMESTAMP, WORD_EMBEDDING_DIM), #name='conv' ) # dropout = Dropout(0.5) # dropout input_dropout = conv(input_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) input_pool = maxpool(input_dropout) # activation activation = Activation('tanh') output = activation(input_pool) return output
def sigmoid_cross_entropy(y_true, y_pred): z = K.flatten(y_true) x = K.flatten(y_pred) q = 10 l = (1 + (q - 1) * z) loss = (K.sum((1 - z) * x) + K.sum(l * (K.log(1 + K.exp(- K.abs(x))) + K.max(-x, 0)))) / 500 return loss
def doubleFeature(self, pos, neg, conv_name, activation='tanh'): name = '%s+%s' % (pos, neg) filters = self.params['filters'] nb_filter = self.params['nb_filter'] convs = self.layers[conv_name] features = [] pos = self.tensors[pos] neg = self.tensors[neg] for fsz, conv in zip(filters, convs): sum = Merge( mode='sum', )([conv(pos), conv(neg)]) if type(activation) == type(''): act = Activation( activation, name='%s-act-%d' % ('+'.join(input_names), fsz) )(sum) else: act = activation( name='%s-advanced-act-%d' % (name, fsz) )(sum) maxpool = Lambda( lambda x: K.max(x, axis=1), output_shape=(nb_filter,), name='%s-maxpool-%d' % (name, fsz) )(act) print('maxpool', maxpool._keras_shape) features.append(maxpool) if len(features) > 1: return Merge( mode='concat', name='%s-feature' % name, )(features) else: return features[0]
def build(self): question = self.question answer = self.get_answer() # add embedding layers embedding = Embedding(self.config['n_words'], self.model_params.get('n_embed_dims', 141)) question_embedding = embedding(question) answer_embedding = embedding(answer) # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_maxpool = maxpool(question_dropout) answer_maxpool = maxpool(answer_dropout) # activation activation = Activation('tanh') question_output = activation(question_maxpool) answer_output = activation(answer_maxpool) return question_output, answer_output
def build(self): assert self.config['question_len'] == self.config['answer_len'] question = self.question answer = self.get_answer() # add embedding layers weights = self.model_params.get('initial_embed_weights', None) weights = weights if weights is None else [weights] embedding = Embedding(input_dim=self.config['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=weights) question_embedding = embedding(question) answer_embedding = embedding(answer) # turn off layer updating # embedding.params = [] # embedding.updates = [] # dropout dropout = Dropout(0.5) question_dropout = dropout(question_embedding) answer_dropout = dropout(answer_embedding) # dense dense = TimeDistributed(Dense(self.model_params.get('n_hidden', 200), # activity_regularizer=regularizers.activity_l1(1e-4), # W_regularizer=regularizers.l1(1e-4), activation='tanh')) question_dense = dropout(dense(question_dropout)) answer_dense = dropout(dense(answer_dropout)) # cnn cnns = [Convolution1D(filter_length=filter_length, nb_filter=self.model_params.get('nb_filters', 1000), activation=self.model_params.get('conv_activation', 'relu'), # W_regularizer=regularizers.l1(1e-4), # activity_regularizer=regularizers.activity_l1(1e-4), border_mode='same') for filter_length in [2, 3, 5, 7]] question_cnn = merge([cnn(question_dense) for cnn in cnns], mode='concat') answer_cnn = merge([cnn(answer_dense) for cnn in cnns], mode='concat') # dropout question_dropout = dropout(question_cnn) answer_dropout = dropout(answer_cnn) # maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) avepool = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) question_pool = maxpool(question_dropout) answer_pool = maxpool(answer_dropout) # activation larger_dropout = Dropout(0.5) activation = Activation('linear') question_output = larger_dropout(activation(question_pool)) answer_output = larger_dropout(activation(answer_pool)) return question_output, answer_output
def get_output(self, train=False): X = self.get_input(train) if train: M = K.max(X, axis=(2, 3), keepdims=True) R = K.switch(K.equal(X, M), X, 0.) return R else: return X
def mask_aware_max(x): mask = K.not_equal(K.sum(K.abs(x), axis=2, keepdims=True), 0) mask = K.cast(mask, 'float32') vecmin = K.min(x, axis=1, keepdims=True) xstar = x + (vecmin * (1 - mask)) # setting masked values to the min value return K.max(xstar, axis=1, keepdims=False)
def build_loss(self): # Infinity norm if np.isinf(self.p): value = K.max(self.img) else: value = K.pow(K.sum(K.pow(K.abs(self.img), self.p)), 1. / self.p) return normalize(self.img, value)
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times if self.dim_ordering == 'th': for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = x + ix * row_length x2 = x1 + row_length y1 = y + jy * col_length y2 = y1 + col_length x1 = K.cast(x1, 'int32') x2 = K.cast(x2, 'int32') y1 = K.cast(y1, 'int32') y2 = K.cast(y2, 'int32') x2 = x1 + K.maximum(1, x2 - x1) y2 = y1 + K.maximum(1, y2 - y1) new_shape = [ input_shape[0], input_shape[1], y2 - y1, x2 - x1 ] x_crop = img[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) if self.dim_ordering == 'th': final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) else: final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def model_conv(emb1, emb3, emb6,emb7,num_feature_input): #emb3, , num_feature_input K.clear_session() emb_layer_1 = Embedding( input_dim=emb1.shape[0], output_dim=emb1.shape[1], weights=[emb1], input_length=95,###30 trainable=False ) emb_layer_3 = Embedding( input_dim=emb3.shape[0], output_dim=emb3.shape[1], weights=[emb3], input_length=95, trainable=False ) emb_layer_6 = Embedding( input_dim=emb6.shape[0], output_dim=emb6.shape[1], weights=[emb6], input_length=95, trainable=False ) emb_layer_7 = Embedding( input_dim=emb7.shape[0], output_dim=emb7.shape[1], weights=[emb7], input_length=95, trainable=False ) seq1 = Input(shape=(95,))#####30 seq3 = Input(shape=(95,)) seq6 = Input(shape=(95,)) seq7 = Input(shape=(95,)) x1 = emb_layer_1(seq1) x3 = emb_layer_3(seq3) x6 = emb_layer_6(seq6) x7 = emb_layer_7(seq7) #del emb1,emb3,emb6 #gc.collect() sdrop=SpatialDropout1D(rate=0.2) x1 = sdrop(x1) x3 = sdrop(x3) x6 = sdrop(x6) x7 = sdrop(x7) ##主要网络结构 x13 = concatenate([x1,x3,x6,x7], axis=-1 ) x13 = Dropout(0.35)(Bidirectional(CuDNNLSTM(320, return_sequences=True))(x13)) x13 = Dense(300, activation='relu')(x13) semantic13 = TimeDistributed(Dense(128, activation="tanh"))(x13) merged_13 = Lambda(lambda x: K.max(x, axis=1), output_shape=(128,))(semantic13) merged_13_avg = Lambda(lambda x: K.mean(x, axis=1), output_shape=(128,))(semantic13) x = Dropout(0.3)(Bidirectional(CuDNNLSTM(300, return_sequences=True))(x13)) att_1 = Attention(95)(x) att_1 = Dense(128, activation='relu')(att_1) hin = Input(shape=(num_feature_input, )) htime = Dense(320, activation='relu')(hin) htime = Dense(160, activation='relu')(htime) x = concatenate([att_1, merged_13, merged_13_avg, htime]) x = Dropout(0.4)(Activation(activation="relu")(BatchNormalization()(Dense(1300)(x)))) x = Activation(activation="relu")(BatchNormalization()(Dense(600)(x))) pred = Dense(2, activation='softmax')(x) model = Model(inputs=[seq1, seq3, seq6,seq7,hin], outputs=pred) #from keras.utils import multi_gpu_model #model = multi_gpu_model(model, 2) model.compile(loss='categorical_crossentropy', optimizer=AdamW(lr=0.001,weight_decay=0.06,),metrics=["accuracy"]) return model
def softmaxNd(x, axis=-1): m = K.max(x, axis=axis, keepdims=True) exp_x = K.exp(x - m) prob_x = exp_x / K.sum(exp_x, axis=axis, keepdims=True) return prob_x
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def mean_q(y_true, y_pred): return K.mean(K.max(y_pred, axis=-1))
def log_sum_exp(x, axis=None): """Log-sum-exp trick implementation""" x_max = K.max(x, axis=axis, keepdims=True) return K.log(K.sum(K.exp(x - x_max), axis=axis, keepdims=True)) + x_max
def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask
def deep_er_model_generator(data_dict, embedding_file, padding_limit = 100, mask_zero = True, embedding_trainable = False, text_columns = list(), numeric_columns_1D = list(), numeric_columns_2D = list(), category_num_cols = list(), image_url_cols = list(), make_isna = True, text_nan_idx = None, num_nan_val = None, text_compositions = ['hybrid'], text_sim_metrics = ['cosine'], image_sim_metrics = ['cosine'], numeric_sim_metrics = ['unscaled_inverse_lp'], dense_nodes = [10], lstm_args = dict(units=50), document_frequencies = None, idf_smoothing = 2, batch_norm = False, dropout = 0, shared_lstm = True, debug = False): """ Takes a dictionary of paired split DataFrames and returns a DeepER model with data formatted for said model. Parameters ---------- data_dict : dict A dictionary of dataframes (pd.DataFrame) stored with the following keys: train_1, val_1, test_1, train_2, val_2, test_2 embedding_file : str The location and name of numpy matrix containing word vector embeddings. padding_limit : int, optional The maximum length of any text sequence. For any text attribute whose max length is below padding_limit, the max length will be used. Otherwise, padding_limit will be used to both pad and truncuate text sequences for that attribute. mask_zero : bool, optional Whether to ignore text sequence indices with value of 0. Useful for LSTM's and variable length inputs. embedding_trainable: bool, optional Whether to allow the embedding layer to be fine tuned. text_columns : list of strings, optional A list of names of text-based attributes numeric_columns : list of strings, optional A list of names of numeric attributes make_isna: bool, optional Whether to create new attributes indicating the presence of null values for each original attribute. text_nan_idx : int, optional The index corresponding to NaN values in text-based attributes. num_nan_val : int, optional The value corresponding to NaN values in numeric attributes. text_compositions : list of strings, optional List of composition methods to be applied to embedded text attributes. Valid options are : - average : a simple average of all embedded vectors - idf : an average of all embedded vectors weighted by normalized inverse document frequency text_sim_metrics : list of strings, optional List of similarity metrics to be computed for each text-based attribute. Valid options are : - cosine - inverse_l1 : e^-[l1_distance] - inverse_l2 : e^-[l2_distance] numeric_sim_metrics : list of strings, optional List of similarity metrics to be computed for each numeric attribute. Valid options are : - scaled_inverse_lp : e^[-2(abs_diff)/sum] - unscaled_inverse_lp : e^[-abs_diff] - min_max_ratio : min / max dense_nodes : list of ints, optional Specifies topology of hidden dense layers lstm_args = dict, optional Keyword arguments for LSTM layer document_frequencies = tuple of length 2, optional Tuple of two lists of document frequencies, left side then right idf_smoothing : int, optional un-normalized idf = 1 / df ^ (1 / idf_smoothing) Higher values means that high document frequency words are penalized less. """ ### DATA PROCESSING ### # initialize an empty dictionary for storing all data # dictionary structure will be data[split][side][column] sides = ['left', 'right'] splits = ['train', 'val', 'test'] data = dict() for split in splits: data[split] = dict() for side in sides: data[split][side] = dict() numeric_columns = numeric_columns_1D+numeric_columns_2D+category_num_cols columns = text_columns + numeric_columns+image_url_cols # separate each feature into its own dictionary entry for column in columns: data['train']['left'][column] = data_dict['train_1'][column] data['train']['right'][column] = data_dict['train_2'][column] data['val']['left'][column] = data_dict['val_1'][column][:5000] data['val']['right'][column] = data_dict['val_2'][column][:5000] data['test']['left'][column] = data_dict['test_1'][column][:1000] data['test']['right'][column] = data_dict['test_2'][column][:1000] import numpy as np def load_image(x): x = np.load('./imagess'+x[1:]+'.npy') return x from tqdm import tqdm # for column in image_url_cols: # print('image loadling...') # list_t_l = [] # for i in tqdm(list(data['train']['left'][column])): # list_t_l.append( load_image(i) ) # data['train']['left'][column] = pd.Series(list_t_l) # #print(data['train']['left'][column][:3], '---------- the first 3', data['train']['left'][column][:1].shape) # print('finished the the loading') # data['train']['right'][column] = pd.Series([load_image(i) for i in tqdm(list(data['train']['right'][column]))]) # # data['test']['left'][column] = pd.Series([load_image(i) for i in tqdm(list(data['test']['left'][column]))]) # data['test']['right'][column] = pd.Series([load_image(i) for i in tqdm(list(data['test']['right'][column]))]) # data['val']['left'][column] = pd.Series([load_image(i) for i in tqdm(list(data['val']['left'][column]))]) # data['val']['right'][column] = pd.Series([load_image(i) for i in tqdm(list(data['val']['right'][column]))]) # data['test']['left'][column] = pd.Series([load_image(i) for i in tqdm(list(data['test']['left'][column]))]) # data['test']['right'][column] = pd.Series([load_image(i) for i in tqdm(list(data['test']['right'][column]))]) # if enabled, create a binary column for each feature indicating whether # it contains a missing value. for text data, this will be a list with # a single index representing the 'NaN' token. for numeric data, this will # likely be a 0. if make_isna: for split, side, column in it.product(splits, sides, text_columns): isna = data[split][side][column].apply(lambda x: x == [text_nan_idx]) isna = isna.values.astype(np.float32).reshape(-1, 1) isna_column = column + '_isna' data[split][side][isna_column] = isna for split, side, column in it.product(splits, sides, numeric_columns): isna = data[split][side][column].apply(lambda x: x == num_nan_val) isna_column = column + '_isna' isna = isna.values.astype(np.float32).reshape(-1, 1) data[split][side][isna_column] = isna # pad each text column according to the length of its longest entry in # both datasets maxlen = dict() import numpy as np for column in text_columns: #print(data['train']['left'][column][:20]) maxlen_left = data['train']['left'][column].apply(lambda x: len(x) if type(x) != float else len([x])).max() #print(maxlen_left) # data['train']['left'][column].apply(lambda x: print(x) if type(x) != float and len(x)==3151 ) maxlen_right = data['train']['right'][column].apply(lambda x: len(x) if type(x) != float else len([x])).max() print(maxlen_left, maxlen_right ) maxlength = min(padding_limit, max(maxlen_left, maxlen_right)) #data[split][side][column] = data[split][side][column].apply(lambda x: [] if x == np.nan else x) for split, side in it.product(splits, sides): data[split][side][column] = data[split][side][column].apply(lambda x: [] if x == np.nan else x) data[split][side][column] = pad_sequences(data[split][side][column],maxlen=maxlength,padding='post',truncating='post') maxlen[column] = maxlength with open('./maxlen.map', 'wb') as f: pkl.dump(maxlen, f) # convert all numeric features to float and reshape to be 2-dimensional numeric_columns = numeric_columns_1D + numeric_columns_2D + category_num_cols for split, side, column in it.product(splits, sides, numeric_columns): feature = data[split][side][column] feature = feature.values.astype(np.float32).reshape(-1,1) data[split][side][column] = feature # format X values for each split as a list of 2-dimensional arrays packaged_data = OrderedDict() for split in splits: packaged_data[split] = list() for side, column in it.product(sides, columns): packaged_data[split].append(data[split][side][column]) if make_isna: for side, column in it.product(sides, columns): packaged_data[split].append(data[split][side][column + '_isna']) # convert y-values y_train = to_categorical(data_dict['train_y']) y_val = to_categorical(data_dict['val_y'])[:5000] y_test = to_categorical(data_dict['test_y'])[:1000] ### MODEL BUILDING ### # each attribute of each side is its own input tensor # text input tensors for both sides are created before numeric input tensors input_tensors = dict(left=dict(), right=dict()) for side, column in it.product(sides, text_columns): input_tensors[side][column] = Input(shape=(maxlen[column],)) for side, column in it.product(sides, numeric_columns): input_tensors[side][column] = Input(shape=(1,)) for side, column in it.product(sides, image_url_cols): input_tensors[side][column] = Input(shape=(1,64,2048, ),name='image'+str(side) ) #create embedding layer for image features from keras.models import Sequential from keras.layers import Dense, Activation from keras.layers.core import Flatten #CNN_Encoder = Sequential([Flatten(), Dense(embedding_dim), activation='relu']) #class CNN_Encoder(tf.keras.Model): # # Since we have already extracted the features and dumped it using pickle # # This encoder passes those features through a Fully connected layer # def __init__(self, embedding_dim): # super(CNN_Encoder, self).__init__() # # shape after fc == (batch_size, 64, embedding_dim) # #self.model = tf.keras.models.Sequential() # #self.model.add(tf.keras.layers.Flatten()) # #self.model.add(tf.keras.layers.Dense(embedding_dim, activation='relu')) # self.fc = tf.keras.layers.Dense(embedding_dim, activation='relu') # def call(self, x): # x = tf.keras.layers.Flatten()(x) # # x = self.model(x) # x = self.fc(x) # return x highwaynet_img = Lambda(lambda x: highway_layers(x,2)) embedding_layer_image= Sequential([Dense(300, activation='relu'), highwaynet_img]) squeeze_layer = Lambda(lambda x: tf.squeeze(x, [1])) embedded_tensors = dict(left=dict(), right=dict()) for side, column in it.product(sides, image_url_cols): embedded_tensors[side][column] = embedding_layer_image(input_tensors[side][column]) embedded_tensors[side][column] = squeeze_layer( embedded_tensors[side][column]) # similarity_image_layers = list() # if 'cosine' in image_sim_metrics: # similarity_image_layer = Dot(axes=1, normalize=True) # similarity_image_layers.append(similarity_image_layer) # if 'inverse_l1' in image_sim_metrics: # similarity_image_layer = Lambda(lambda x: K.exp(-K.sum(K.abs(x[0]-x[1]), axis=1, keepdims=True))) # similarity_image_layers.append(similarity_image_layer) # if 'inverse_l2' in image_sim_metrics: # similarity_image_layer = Lambda(lambda x: \ # K.exp(-K.sqrt(K.sum(K.pow(x[0]-x[1], 2), axis=1, keepdims=True)))) # similarity_image_layers.append(similarity_image_layer) # for each attribute, calculate similarities between left and ride sides similarity_tensors = list() # create a single embedding layer for text input tensors #highwaynet_sent = Lambda(lambda x: highway_layers(x,2)) embedding_matrix = np.load(embedding_file) embedding_layer = Sequential() embedding_layer.add(Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=embedding_trainable, mask_zero=mask_zero) ) #embedding_layer.add(highwaynet_sent) # use embedding_layer ot convert text input tensors to embedded tensors # and store in dictionary. # an embedding tensor will have shape n_words x n_embedding_dimensions #embedded_tensors = dict(left=dict(), right=dict()) directionals = ['positive', 'inverse'] for side, column in it.product(sides, text_columns): embedded_tensors[side][column] = embedding_layer(input_tensors[side][column]) text_img_tensors = list() def bidirect_layer( in_question_words, in_passage_words, ll=50): # ========Bilateral Matching===== matrix = matrix_cor(ll) matrix.build( 2*ll ) match_representation = bilateral_match_func(in_question_words, in_passage_words, matrix) return match_representation def bilateral_match_func(in_question_repres, in_passage_repres, matrix): match_representation_1 = uni_match_func(in_question_repres, in_passage_repres, matrix) # match_representation_2 = uni_match_func(in_passage_repres, in_question_repres, matrix) # match_representation = Concatenate(axis=-1)([match_representation_1, match_representation_2]) return match_representation_1 def uni_match_func(in_question_repres, in_passage_repres, matrix, units = 50, ll=50): gru_question_layer = gru( units, directional=True, return_sequences= True) #[batch_size, number of word or imagevector, 2h] gru_passage_layer = gru( units,directional=True, return_sequences= False) gru_question = gru_question_layer(in_question_repres) gru_passage = gru_passage_layer(in_passage_repres) gru_question = tf.expand_dims(gru_question, 2) gru_question = tf.tile(input=gru_question, multiples=[1, 1, ll, 1]) gru_question = matrix(gru_question) print(K.int_shape(gru_question)) gru_question = tf.reshape(gru_question, [-1, tf.shape(in_question_repres)[1], 2*ll, units]) gru_passage = tf.expand_dims(gru_passage, 1) gru_passage = tf.tile(input=gru_passage, multiples=[1, ll, 1]) gru_passage = matrix(gru_passage) gru_passage = tf.expand_dims(gru_passage, 1) gru_passage = tf.tile(input=gru_passage, multiples=[1, tf.shape(gru_question)[1], 1, 1]) gru_passage = tf.reshape(gru_passage, [-1, tf.shape(in_question_repres)[1], 2*ll, units]) unscale_cos = tf.reduce_sum(tf.multiply(gru_passage, gru_question), -1) norm_question = tf.sqrt(tf.reduce_sum(tf.square(gru_question), -1)) norm_passage = tf.sqrt(tf.reduce_sum(tf.square(gru_passage), -1)) cos_m = unscale_cos / (norm_passage * norm_question + 0.0000001) #cos_m = tf.squeeze(cos_m, [-1]) #cos_m = tf.reshape(cos_m, [-1, tf.shape(in_question_repres)[1], ll, 2*units]) gru_agg_layer = gru( units,directional=True, return_sequences= False) summary_vector = gru_agg_layer(cos_m) return summary_vector mbm_layer = Lambda(lambda x: bidirect_layer(x[0], x[1])) mbm_layer_1 = Lambda(lambda x: bidirect_layer(x[0], x[1])) mbm_layer_2 = Lambda(lambda x: bidirect_layer(x[0], x[1])) text_img_tensors.append(mbm_layer([embedded_tensors['left'][text_columns[0]], embedded_tensors['right'][text_columns[0]] ])) print(K.int_shape( embedded_tensors['right'][image_url_cols[0]] )) text_img_tensors.append( mbm_layer_1([embedded_tensors['left'][text_columns[0]], embedded_tensors['right'][image_url_cols[0]] ])) text_img_tensors.append( mbm_layer_1([embedded_tensors['right'][text_columns[0]], embedded_tensors['left'][image_url_cols[0]] ]) ) text_img_tensors.append( mbm_layer_2([embedded_tensors['left'][image_url_cols[0]], embedded_tensors['right'][image_url_cols[0]] ]) ) #highwaynet_compared = Lambda(lambda x: highway_layers(x,2)) composed_tensors = dict() composed_tensors['vbi'] = Concatenate(axis=-1)(text_img_tensors) similarity_tensors.append(composed_tensors['vbi'] ) # reset similarity layer to empty so only numeric-based similarities are used similarity_layers = list() if 'scaled_inverse_lp' in numeric_sim_metrics: similarity_layer = Lambda(lambda x: K.exp(-2 * K.abs(x[0]-x[1]) / (x[0] + x[1] + 1e-5))) similarity_layers.append(similarity_layer) if 'unscaled_inverse_lp' in numeric_sim_metrics: similarity_layer = Lambda(lambda x: K.exp(-K.abs(x[0]-x[1]))) similarity_layers.append(similarity_layer) for column, similarity_layer in it.product(numeric_columns, similarity_layers): similarity_tensor = similarity_layer([input_tensors['left'][column], input_tensors['right'][column]]) similarity_tensors.append(similarity_tensor) if 'min_max_ratio' in numeric_sim_metrics: for column in numeric_columns: num_concat = Concatenate(axis=-1)([input_tensors['left'][column], input_tensors['right'][column]]) similarity_layer = Lambda(lambda x: K.min(x, axis=1, keepdims=True) / \ (K.max(x, axis=1, keepdims=True) + 1e-5)) similarity_tensors.append(similarity_layer(num_concat)) # create input tensors from _isna attributes input_isna_tensors = list() if make_isna: for side, column in it.product(sides, columns): input_isna_tensors.append(Input(shape=(1,))) num_dense_inputs = len(similarity_tensors) + len(input_isna_tensors) if 'lstm ' in text_compositions or 'bi_lstm' in text_compositions: num_dense_inputs += lstm_args['units'] * len(text_columns) print('Number of inputs to dense layer: {}'.format(num_dense_inputs)) # concatenate similarity tensors with isna_tensors. concatenated_tensors = Concatenate(axis=-1)(similarity_tensors ) #x = Concatenate(axis=-1)( [concatenated_tensors]+ [tf.convert_to_tensor([[[1,2],[3,4]]])]) # create dense layers starting with concatenated tensors dense_tensors = [concatenated_tensors] print(keras.backend.int_shape(concatenated_tensors), '***************************************') for n_nodes in dense_nodes: fc = Dense(2*n_nodes, activation='relu', name='output') #fc_1 = Dense(n_nodes, activation='relu', name='output_2') #print(type(fc)) dense_tensor = fc(dense_tensors[-1]) #dense_tensor = fc_1(dense_tensor) # with tf.Session() as sess: # print(sess.run(dense_tensor)) # print(dense_tensor.numpy()) # print(dense_tensor.numpy()) if batch_norm and dropout: dense_tensor_bn = BatchNormalization(name='batchnormal')(dense_tensor) dense_tensor_dropout = Dropout(dropout)(dense_tensor_bn) dense_tensors.append(dense_tensor_dropout) else: dense_tensors.append(dense_tensor) dense_tensors.pop(0) output_tensors = Dense(2, activation='softmax')(dense_tensors[0]) #output_tensors = composed_tensors['vbi'] #output_tensors = text_img_tensors['left'] product = list(it.product(sides, columns)) if not debug: model = Model([input_tensors[s][tc] for s, tc in product] + input_isna_tensors, [output_tensors]) else: model = Model([input_tensors[s][tc] for s, tc in product] + input_isna_tensors, [embedded_tensors['left'][text_columns[0]]]) # return tuple([model] + list(packaged_data.values()) + [y_train, y_val, y_test]) return tuple([model] + list(packaged_data.values()) + [y_train, y_val, y_test])
def _global_max_nd(xtens): ytens = K.batch_flatten(xtens) return K.max(ytens, 1, keepdims=True)
def pres_acc(y_true, y_pred): true = (K.max(K.max(y_true, axis=-1), axis=-1)) pred = (K.max(K.max(y_pred, axis=-1), axis=-1)) return K.mean(K.equal(K.round(pred), K.round(true)))
def logsumexp(x, axis=None): x_max = K.max(x, axis=axis, keepdims=True) return K.log(K.sum(K.exp(x - x_max), axis=axis, keepdims=True)) + x_max
def max_1d(X): return K.max(X, axis=1)
def call(self, x): output = K.max(x, axis=1, keepdims=False) return output
def max_norm(x): max_vals = K.expand_dims(K.max(x, axis=1), 1) return x / max_vals
def _construct_acoustic_model(self, train_config=TrainConfig()): # construct an acoustic model from scratch self._cur_weight_seed = self.config.weight_seed def _layer_kwargs(): ret = { 'activation': 'linear', 'kernel_initializer': RandomUniform( minval=-self.config.weight_init_mag, maxval=self.config.weight_init_mag, seed=self._cur_weight_seed, ), } self._cur_weight_seed = self._cur_weight_seed + 1 if train_config.training_stage == 'sgd': ret['kernel_regularizer'] = l2(train_config.sgd_reg) return ret # convolutional layer pattern def _conv_maxout_layer(last_layer, n_filts, name_prefix, dropout=True): conv_a = Conv2D( n_filts, (self.config.filt_time_width, self.config.filt_freq_width), strides=( self.config.filt_time_stride, self.config.filt_freq_stride, ), padding='same', name=name_prefix + '_a', **_layer_kwargs() )(last_layer) conv_b = Conv2D( n_filts, (self.config.filt_time_width, self.config.filt_freq_width), strides=( self.config.filt_time_stride, self.config.filt_freq_stride, ), padding='same', name=name_prefix + '_b', **_layer_kwargs() )(last_layer) last = Maximum(name=name_prefix + '_m')([conv_a, conv_b]) # pre-weights (i.e. post max), as per # http://jmlr.org/proceedings/papers/v28/goodfellow13.pdf if dropout: last = Dropout( train_config.dropout, name=name_prefix + '_d', seed=self._cur_weight_seed)(last) self._cur_weight_seed += 1 return last # inputs feat_input = Input( shape=( None, self.config.num_feats * (1 + self.config.delta_order), 1, ), name='feat_in', ) feat_size_input = Input( shape=(1,), dtype='int32', name='feat_size_in') label_input = Input( shape=(None,), dtype='int32', name='label_in') label_size_input = Input( shape=(1,), dtype='int32', name='label_size_in') last_layer = feat_input # convolutional layers n_filts = self.config.init_num_filt_channels last_layer = _conv_maxout_layer( last_layer, n_filts, 'conv_1', dropout=False) last_layer = MaxPooling2D( pool_size=( self.config.pool_time_width, self.config.pool_freq_width), name='conv_1_p')(last_layer) last_layer = Dropout( train_config.dropout, name='conv_1_d', seed=self._cur_weight_seed)(last_layer) self._cur_weight_seed += 1 for layer_no in range(2, 11): if layer_no == 5: n_filts *= 2 last_layer = _conv_maxout_layer( last_layer, n_filts, 'conv_{}'.format(layer_no)) last_layer = Lambda( lambda layer: K.max(layer, axis=2), output_shape=(None, n_filts), name='max_freq_into_channel', )(last_layer) # dense layers for layer_no in range(1, 4): name_prefix = 'dense_{}'.format(layer_no) dense_a = Dense( self.config.num_dense_hidden, name=name_prefix + '_a', **_layer_kwargs() ) dense_b = Dense( self.config.num_dense_hidden, name=name_prefix + '_b', **_layer_kwargs() ) td_a = TimeDistributed( dense_a, name=name_prefix + '_td_a')(last_layer) td_b = TimeDistributed( dense_b, name=name_prefix + '_td_b')(last_layer) last_layer = Maximum(name=name_prefix + '_m')([td_a, td_b]) last_layer = Dropout( train_config.dropout, name=name_prefix + '_d', seed=self._cur_weight_seed, )(last_layer) self._cur_weight_seed += 1 activation_dense = Dense( self.config.num_labels, name='dense_activation', **_layer_kwargs() ) activation_layer = TimeDistributed( activation_dense, name='dense_activation_td')(last_layer) # we take a page from the image_ocr example and treat the ctc as a # lambda layer. loss_layer = Lambda( lambda args: _ctc_loss(*args), output_shape=(1,), name='ctc_loss' )([ label_input, activation_layer, feat_size_input, label_size_input ]) self.model = Model( inputs=[ feat_input, label_input, feat_size_input, label_size_input, ], outputs=[loss_layer], )
def spread(y_true, y_pred): return K.min(y_pred) - K.max(y_pred)
def k_max(y_true, y_pred): return K.max(y_pred)
def black_box(self): num_embeddings, embedding_dim = self.embeddings.shape # input layer input_diag = Input(shape=(self.diag_shape, )) input_patient = Input(shape=(self.patient_shape, )) input_hosp = Input(shape=(self.hosp_shape, )) #------------------------------------- embeddings layer -------------------------------------# # embeddings layer diag_emb = Embedding(num_embeddings, embedding_dim, weights=[self.embeddings], input_length=self.diag_shape, trainable=True)(input_diag) #-------------------------------------- deepset layer --------------------------------------# # select primary disease diag_primary_emb = Lambda(lambda x: x[:, 0, :], name="Lambda_" + str(0))(diag_emb) # take sum, minimum and maximum of 8 icd code embeddings (vectors) of one patient Adder = Lambda(lambda x: K.sum(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2]))) Maxer = Lambda(lambda x: K.max(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2]))) Miner = Lambda(lambda x: K.min(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2]))) mined = Miner(diag_emb) maxed = Maxer(diag_emb) added = Adder(diag_emb) # concatenate (merge) outputs of maxed, added, mined and the primary diagnosis embedding out = Concatenate()([maxed, added, mined, diag_primary_emb]) # diagnosis for i in range(self.layers_diag - 1): out = Dense(self.nodes_diag, activation=self.act_diag)(out) out = Dropout(self.dropout)(out) deepset = Dense(self.nodes_diag, activation=self.act_diag)( out) #no dropout in last layer # add patients and hospital dnn = Concatenate()([input_hosp, input_patient, deepset]) for i in range(self.layers_patient): dnn = Dense(self.nodes_patient, activation=self.act_patient)(dnn) dnn = Dropout(self.dropout)(dnn) #--------------------------------- hospital-specific effect ---------------------------------# # output layer out = Dense(self.nodes_out, activation=self.act_out, bias_initializer=self.bias_init, kernel_regularizer=self.l2_reg, name="output")(dnn) self.model = Model(inputs=[input_diag, input_patient, input_hosp], outputs=out) self.model.compile(optimizer=self.optimizer, loss=self.loss) return self.model
def SSD_1_LOSS(self, yGT, yPred): # convert to matrix yGT = K.reshape(yGT, [20, 5]) # GT has a fixed size, so we keep only non zero box yGT = self.get_y(yGT) # split BBox and Class bbox_GT = yGT[:, 1:5] class_GT = yGT[:, 0] # split class and box yPredClas = yPred[:self.nb_activation * self.nb_class] yPredBox = yPred[self.nb_activation * self.nb_class:] class_Pred = K.reshape(yPredClas, [self.nb_activation, self.nb_class]) bbox_Pred = K.reshape(yPredBox, [self.nb_activation, 4]) # convert to activation a_ic = self.actn_to_bb(bbox_Pred) # ccompute jaccard matrix overlaps = self.jaccard(bbox_GT, self.anchor_cnr) # Map to ground truth gt_idx = K.argmax( overlaps, 0) # [16] for each activation, ID of the GT with the best overlapp gt_overlap = K.max( overlaps, 0) # [16] for each cell, ID of the GT with the best overlapp prior_idx = K.argmax(overlaps, 1) # [4] for each GT, ID of best anchors prior_overlap = K.max(overlaps, 1) # [4] for each GT, value of tye best overlapp # BBOX Loss ADD = K.tf.one_hot(prior_idx, self.nb_activation) ADD = K.cast(K.sum(ADD, axis=0), ('float64')) gt_overlap = gt_overlap + ADD Threshold = 0.4 valid_anchor = gt_overlap > Threshold mask = K.cast(valid_anchor, ('float32')) bbox = K.gather(bbox_GT, gt_idx) bbox = K.cast(bbox, ('float32')) loc_loss = K.abs(a_ic - bbox) loc_loss = K.sum(loc_loss, axis=1) loc_loss = K.tf.multiply(loc_loss, mask) loc_loss = (K.sum(loc_loss)) / K.sum(mask) # Classification Loss # Loss for overlapp >0.5 sel_gt_clas = K.gather(class_GT, gt_idx) gt_class_per_activation = K.one_hot(K.cast(sel_gt_clas, ('int32')), 20) # for overlapp below 0.5, we have to put 0 in gt_class_per_activation valid_anchor = gt_overlap > Threshold mask = K.cast(valid_anchor, ('float32')) mask = K.reshape(K.repeat_elements(mask, self.nb_class, 0), (self.nb_activation, self.nb_class)) One_Hot_Overlap = gt_class_per_activation * mask # then, we estimate BCE for mandatory box (GT) pred_mandatory_anchor = K.gather(class_Pred, prior_idx) One_Hot_mandatory = K.tf.one_hot(K.cast(class_GT, 'int32'), self.nb_class) target = K.concatenate([One_Hot_Overlap, One_Hot_mandatory], axis=0) pred = K.concatenate([class_Pred, pred_mandatory_anchor], axis=0) clas_loss = K.mean(K.binary_crossentropy(target, pred)) return clas_loss * 5 + loc_loss
def _construct_acoustic_model(self): # construct acoustic model # convolutional layer pattern def _conv_maxout_layer(last_layer, n_filts, name_prefix, dropout=True): conv_a = Conv2D(n_filts, self._filt_size, padding='same', name=name_prefix + '_a', **self._layer_kwargs)(last_layer) conv_b = Conv2D(n_filts, self._filt_size, padding='same', name=name_prefix + '_b', **self._layer_kwargs)(last_layer) last = Maximum(name=name_prefix + '_m')([conv_a, conv_b]) # pre-weights (i.e. post max), as per # http://jmlr.org/proceedings/papers/v28/goodfellow13.pdf if dropout: last = Dropout(self._dropout_p, name=name_prefix + '_d')(last) return last n_filts = self._initial_filts # inputs audio_input_shape = [self._input_shape[0], self._input_shape[1], 1] if self._deltas: if self._deltas.concatenate: audio_input_shape[1] *= self._deltas.num_deltas + 1 else: audio_input_shape[2] *= self._deltas.num_deltas + 1 self._audio_input = Input(shape=audio_input_shape, name='audio_in') self._audio_size_input = Input(shape=(1, ), name='audio_size_in') self._label_input = Input(shape=(None, ), name='label_in') self._label_size_input = Input(shape=(1, ), name='label_size_in') last_layer = self._audio_input # convolutional layers last_layer = _conv_maxout_layer(last_layer, n_filts, 'conv_1', dropout=False) last_layer = MaxPooling2D(pool_size=self._pool_size, name='conv_1_p')(last_layer) last_layer = Dropout(self._dropout_p, name='conv_1_d')(last_layer) for layer_no in range(2, 11): if layer_no == 5: n_filts *= 2 last_layer = _conv_maxout_layer(last_layer, n_filts, 'conv_{}'.format(layer_no)) last_layer = Lambda( lambda layer: K.max(layer, axis=2), output_shape=( self._input_shape[0], n_filts, ), name='max_freq_into_channel', )(last_layer) # dense layers for layer_no in range(1, 4): name_prefix = 'dense_{}'.format(layer_no) dense_a = Dense(self._dense_size, name=name_prefix + '_a', **self._layer_kwargs) dense_b = Dense(self._dense_size, name=name_prefix + '_b', **self._layer_kwargs) td_a = TimeDistributed(dense_a, name=name_prefix + '_td_a')(last_layer) td_b = TimeDistributed(dense_b, name=name_prefix + '_td_b')(last_layer) last_layer = Maximum(name=name_prefix + '_m')([td_a, td_b]) last_layer = Dropout(self._dropout_p, name=name_prefix + '_d')(last_layer) activation_dense = Dense(self._num_labels, name='dense_activation', **self._layer_kwargs) self._activation_layer = TimeDistributed( activation_dense, name='dense_activation_td')(last_layer) # we take a page from the image_ocr example and treat the ctc as a # lambda layer. self._loss_layer = Lambda(lambda args: _ctc_loss(*args), output_shape=(1, ), name='ctc_loss')([ self._label_input, self._activation_layer, self._audio_size_input, self._label_size_input ]) self._acoustic_model = Model( inputs=[ self._audio_input, self._label_input, self._audio_size_input, self._label_size_input, ], outputs=[self._loss_layer], )
def grayInverse(x): imgMax = K.max(x, axis=(2,3), keepdims=True) return imgMax - x
def construct_graph(self, embedding_matrix=None): # 1.set input layer input_layer = Input(shape=(self.max_sequence_length, )) # 2.set embedding layer with pretrained_embedding or not if self.use_external_embedding: assert self.use_external_embedding is not None embeddin_layer = Embedding( self.vocab_size, self.embedding_dim, mask_zero=True, weights=[embedding_matrix], input_lenght=self.max_sequence_length, trainable=self.embedding_trainable)(input_layer) else: embedding_layer = Embedding( self.vocab_size, self.embedding_dim, mask_zero=True, input_length=self.max_sequence_length, trainable=self.embedding_trainable)(input_layer) # 3 get bi-rnn layer bi_rnn_layer_left = LSTM(self.rnn_cell_size, return_sequences=True)(embedding_layer) bi_rnn_layer_right = LSTM(self.rnn_cell_size, return_sequences=True, go_backwards=True)(embedding_layer) if self.rnn_cell_type == 'GRU': bi_rnn_layer_left = GRU(self.rnn_cell_size, return_sequences=True)(embedding_layer) bi_rnn_layer_right = GRU(self.rnn_cell_size, return_sequences=True, go_backwards=True)(embedding_layer) elif self.rnn_cell_type == 'CuDNNLSTM': bi_rnn_layer_left = CuDNNLSTM( self.rnn_cell_size, return_sequences=True)(embedding_layer) bi_rnn_layer_right = CuDNNLSTM(self.rnn_cell_size, return_sequences=True, go_backwards=True)(embedding_layer) elif self.rnn_cell_type == 'CuDNNGRU': bi_rnn_layer_left = CuDNNGRU( self.rnn_cell_size, return_sequences=True)(embedding_layer) bi_rnn_layer_right = CuDNNGRU(self.rnn_cell_size, return_sequences=True, go_backwards=True)(embedding_layer) #to reverse the sequcence of the output of the go_backwards LSTM bi_rnn_layer_right = Lambda(lambda x: backend.reverse(x, axes=1))( bi_rnn_layer_right) # 4.combine bi-rnn's output(viewed as word's context) and embedding layer(word) concatenate_layer = Concatenate()( [bi_rnn_layer_left, embedding_layer, bi_rnn_layer_right]) # 5.add maxpool to extract the most significant feature MyMaxPool = Lambda(lambda x: max(x, axis=1)) maxpool = MyMaxPool(concatenate_layer) # 6.add dorpout layer dropout_layer = Dropout(self.dropout_rate)(maxpool) # 7.add dense layer and output layer dense_layer = Dense(self.dense_size, activation=self.dense_activation)(dropout_layer) output_layer = Dense(self.label_num, activation='softmax')(dense_layer) self.model = Model(inputs=input_layer, outputs=output_layer)
def __call__(self, y_sing_pred): anchors = np.reshape( self.config["constants"]["anchors"], [1, 1, 1, len(self.config["constants"]["anchors"]) // 2, 2]) # need to convert b's from GRID_SIZE units into IMG coords. Divide by grid here. b_xy = (K.sigmoid(y_sing_pred[..., 0:2]) + self.c_grid[0]) / self.config["model"]["grid_size"] b_wh = (K.exp(y_sing_pred[..., 2:4]) * anchors[0]) / self.config["model"]["grid_size"] b_xy1 = b_xy - b_wh / 2. b_xy2 = b_xy + b_wh / 2. boxes = K.concatenate([b_xy1, b_xy2], axis=-1) # filter out scores below detection threshold scores_all = K.sigmoid(y_sing_pred[..., 4:5]) * K.softmax( y_sing_pred[..., 5:]) indicator_detection = scores_all > self.detection_threshold scores_all = scores_all * K.cast(indicator_detection, np.float32) # compute detected classes and scores classes = K.argmax(scores_all, axis=-1) scores = K.max(scores_all, axis=-1) # flattened tensor length S2B = self.config["model"]["grid_size"] * self.config["model"][ "grid_size"] * len(self.config["constants"]["anchors"]) // 2 # flatten boxes, scores for NMS flatten_boxes = K.reshape(boxes, shape=(S2B, 4)) flatten_scores = K.reshape(scores, shape=(S2B, )) flatten_classes = K.reshape(classes, shape=(S2B, )) inds = [] # apply multiclass NMS for c in range(self.num_classes): # only include boxes of the current class, with > 0 confidence class_mask = K.cast(K.equal(flatten_classes, c), np.float32) score_mask = K.cast(flatten_scores > 0, np.float32) mask = class_mask * score_mask # compute class NMS nms_inds = tf.image.non_max_suppression( flatten_boxes, flatten_scores * mask, max_output_size=self.max_boxes, iou_threshold=self.nms_threshold, score_threshold=0.) inds.append(nms_inds) # combine winning box indices of all classes selected_indices = K.concatenate(inds, axis=-1) # gather corresponding boxes, scores, class indices selected_boxes = K.gather(flatten_boxes, selected_indices) selected_scores = K.gather(flatten_scores, selected_indices) selected_classes = K.gather(flatten_classes, selected_indices) return process_outs(selected_boxes, selected_scores, K.cast(selected_classes, np.float32))
def __init__(self, model, policy=None, test_policy=None, enable_double_dqn=False, enable_dueling_network=False, dueling_type='avg', *args, **kwargs): super(DQNAgent, self).__init__(*args, **kwargs) # Validate (important) input. if hasattr(model.output, '__len__') and len(model.output) > 1: raise ValueError( 'Model "{}" has more than one output. DQN expects a model that has a single output.' .format(model)) if model.output._keras_shape != (None, self.nb_actions): raise ValueError( 'Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.' .format(model.output, self.nb_actions)) # Parameters. self.enable_double_dqn = enable_double_dqn self.enable_dueling_network = enable_dueling_network self.dueling_type = dueling_type if self.enable_dueling_network: # get the second last layer of the model, abandon the last layer layer = model.layers[-2] nb_action = model.output._keras_shape[-1] # layer y has a shape (nb_action+1,) # y[:,0] represents V(s;theta) # y[:,1:] represents A(s,a;theta) y = Dense(nb_action + 1, activation='linear')(layer.output) # caculate the Q(s,a;theta) # dueling_type == 'avg' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) # dueling_type == 'max' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta))) # dueling_type == 'naive' # Q(s,a;theta) = V(s;theta) + A(s,a;theta) if self.dueling_type == 'avg': outputlayer = Lambda( lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean( a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action, ))(y) elif self.dueling_type == 'max': outputlayer = Lambda( lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max( a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action, ))(y) elif self.dueling_type == 'naive': outputlayer = Lambda( lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:], output_shape=(nb_action, ))(y) else: assert False, "dueling_type must be one of {'avg','max','naive'}" model = Model(inputs=model.input, outputs=outputlayer) # Related objects. self.model = model if policy is None: policy = EpsGreedyQPolicy() if test_policy is None: test_policy = GreedyQPolicy() self.policy = policy self.test_policy = test_policy # State. self.reset_states()
def softmax(x, axis=-1): ex = K.exp(x - K.max(x, axis=axis, keepdims=True)) return ex / K.sum(ex, axis=axis, keepdims=True)
def categorical_hinge(y_true, y_pred): pos = K.sum(y_true * y_pred, axis=-1) neg = K.max((1. - y_true) * y_pred, axis=-1) return K.maximum(0., neg - pos + 1.)
# represents our first word vector rather than l_Q[:, 0]. We can think of the weight # matrix (W_c) as being similarly transposed such that each kernel is a column # of W_c. Therefore, h_Q = tanh(l_Q • W_c + b_c) with l_Q, W_c, and b_c being # the transposes of the matrices described in the paper. query_conv = Convolution1D(K, FILTER_LENGTH, padding="same", input_shape=(None, WORD_DEPTH), activation="tanh")(query) # See equation (2). # Next, we apply a max-pooling layer to the convolved query matrix. Keras provides # its own max-pooling layers, but they cannot handle variable length input (as # far as I can tell). As a result, I define my own max-pooling layer here. In the # paper, the operation selects the maximum value for each row of h_Q, but, because # we're using the transpose, we're selecting the maximum value for each column. query_max = Lambda(lambda x: backend.max(x, axis=1), output_shape=(K, ))(query_conv) # See section 3.4. # In this step, we generate the semantic vector represenation of the query. This # is a standard neural network dense layer, i.e., y = tanh(W_s • v + b_s). query_sem = Dense(L, activation="tanh", input_dim=K)(query_max) # See section 3.5. # The document equivalent of the above query model. doc_conv = Convolution1D(K, FILTER_LENGTH, padding="same", input_shape=(None, WORD_DEPTH), activation="tanh") doc_max = Lambda(lambda x: backend.max(x, axis=1), output_shape=(K, )) doc_sem = Dense(L, activation="tanh", input_dim=K)
def softmax_over_time(x): assert (K.ndim(x) > 2) e = K.exp(x - K.max(x, axis=1, keepdims=True)) s = K.sum(e, axis=1, keepdims=True) return e / s
Q1_train = X_train[:, 0] Q2_train = X_train[:, 1] Q1_test = X_test[:, 0] Q2_test = X_test[:, 1] # Define the model question1 = Input(shape=(MAX_SEQUENCE_LENGTH, )) question2 = Input(shape=(MAX_SEQUENCE_LENGTH, )) q1 = Embedding(nb_words + 1, EMBEDDING_DIM, weights=[word_embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(question1) q1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(q1) q1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(q1) q2 = Embedding(nb_words + 1, EMBEDDING_DIM, weights=[word_embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)(question2) q2 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(q2) q2 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(q2) merged = concatenate([q1, q2]) merged = Dense(200, activation='relu')(merged) merged = Dropout(DROPOUT)(merged) merged = BatchNormalization()(merged) merged = Dense(200, activation='relu')(merged) merged = Dropout(DROPOUT)(merged)
def mean_squared_error_p(y_true, y_pred): """ Modified mean square error that clips """ return K.clip(K.max( K.square( y_pred - y_true ) , axis=-1 )-1,0.,100.) # = modified mse error L_inf