def build_global_attention_pooling_model_cascade_attention( base_network, class_num): height, width, depth = base_network[0].output_shape[1:] feature_map_step_1 = base_network[0].output S = Convolution2D(class_num, (1, 1), name='conv_class')(feature_map_step_1) A = GlobalAveragePooling2D()(S) y_old = KL.Softmax(name='output_1')(A) M, M_loss, S_and_loss = spatial_mask_generate(S, y_old, height, width, mask_max=1. / 2, mask_min=1. / 4) feature_map_step_2 = base_network[1].output S_new = Convolution2D(class_num, (1, 1), name='conv_class_filtered')(feature_map_step_2) A_new = GlobalAttentionPooling2D()([S_new, M]) y_new = KL.Softmax(name='output_2')(A_new) r_loss = KL.Lambda(lambda t: rank_transform(t), name='Rank_loss')([y_old, y_new]) cns_loss = KL.Lambda(lambda t: t, name='Cross_network_similarity_loss')(r_loss) x = KL.concatenate([feature_map_step_1, feature_map_step_2]) x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) x = Dense(class_num)(x) y_all = KL.Softmax(name='output_3')(x) A_final = KL.Lambda(lambda t: entropy_add(t))([x, A_new, y_all, y_new]) # output_5 is the final output y = KL.Softmax(name='output_5')(A_final) r2_loss = KL.Lambda(lambda t: rank_transform(t), name='Rank_2_loss')([y_old, y]) r3_loss = KL.Lambda(lambda t: rank_transform(t), name='Rank_3_loss')([y_new, y]) for layer in base_network[1].layers: layer.name += '_2' model = Model(inputs=[base_network[0].input, base_network[1].input], outputs=[ y_old, y_new, y_all, y, M_loss, S_and_loss, r_loss, cns_loss, r2_loss, r3_loss ]) model.summary() return model
def _network2(): optimizer = optimizers.Adadelta(lr=2) model = Sequential() model.add( layers.Dense(units=150, activation='tanh', input_shape=(NO_FEATURES, ))) model.add(layers.Dropout(0.5)) model.add(layers.Dense(units=100, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(units=3, activation='tanh')) model.add(layers.Softmax()) model.compile(loss='mse', optimizer=optimizer, metrics=['accuracy']) [favorites_dataset, retweets_dl] = _transform_to_dataloader() x_train, y_train = favorites_dataset[0] x_validation, y_validation = favorites_dataset[1] # x_train = np.expand_dims(x_train, axis=1) y_train = _tranform_y_data(y_train) # x_validation = np.expand_dims(x_validation, axis=1) y_validation = _tranform_y_data(y_validation) x = model.fit(x_train, y_train, validation_split=0.3, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1, shuffle=True, callbacks=[es])
def snl(x): """ simplified non local net GCnet 发现在NLnet中图像每个点的全局上下文相近,只计算一个点的全局相似度,计算量减少1/hw :parameter x:input layers or tensor """ bs, h, w, c = x.get_shape().as_list() input_x = x input_x = layers.Reshape((h * w, c))(input_x) # [bs, H*W, C] # input_x = layers.Lambda(lambda x: tf.transpose(x, perm=[0, 2, 1]))(input_x) # [bs,C,H*W] # input_x = layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(input_x) # [bs,1,C,H*W] context_mask = layers.Conv2D(filters=1, kernel_size=(1, 1))(x) # [bs,h,w,1] context_mask = layers.Reshape((h * w, 1))(context_mask) context_mask = layers.Softmax(axis=1)(context_mask) # [bs, H*W, 1] # context_mask = layers.Lambda(lambda x: tf.transpose(x, [0, 2, 1]))(context_mask) # context_mask = layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(context_mask) context = layers.dot([input_x, context_mask], axes=1) # [bs,1,c] context = layers.Reshape((1, 1, c))(context) # context_transform = layers.Conv2D(c, (1, 1))(context) # context_transform = LayerNormalization()(context_transform) # context_transform = layers.ReLU()(context_transform) # context_transform = layers.Conv2D(c, (1, 1))(context_transform) context_transform = layers.Conv2D(c, kernel_size=(1, 1))(context) # x = layers.Add()([x, context_transform]) return x
def create_model(input_shape: tuple, nb_classes: int, init_with_imagenet: bool = False, learning_rate: float = 0.001): weights = None if init_with_imagenet: weights = 'imagenet' model = VGG16(input_shape=input_shape, classes=nb_classes, weights=weights, include_top=False) # "Shallow" VGG for Cifar10 x = model.get_layer('block3_pool').output x = layers.Flatten(name='Flatten')(x) # init = initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=660) x = layers.Dense(512, activation='relu')(x) x = layers.Dropout(.2)(x) x = layers.Dense(nb_classes)(x) x = layers.Softmax()(x) model = models.Model(model.input, x) loss = losses.categorical_crossentropy optimizer = optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08,decay=0.99) model.compile(optimizer, loss, metrics=["accuracy"]) return model
def __call__(self, ): inp = layers.Input((None, None, self.n_channels)) map_size = tf.shape(inp)[1:3] shared = layers.Conv2D( self.n_channels, 3, activation='relu', padding='SAME', kernel_initializer=initializers.random_normal(stddev=0.01), kernel_regularizer=regularizers.l2(1.0), bias_regularizer=regularizers.l2(2.0))(inp) logits = layers.Conv2D( self.n_anchors * 2, 1, kernel_initializer=initializers.random_normal(stddev=0.01), kernel_regularizer=regularizers.l2(1.0), bias_regularizer=regularizers.l2(2.0))(shared) logits = layers.Reshape((-1, 2))(logits) score = layers.Softmax()(logits) delta = layers.Conv2D( self.n_anchors * 4, 1, kernel_initializer=initializers.random_normal(stddev=0.01), kernel_regularizer=regularizers.l2(1.0), bias_regularizer=regularizers.l2(2.0))(shared) delta = layers.Reshape((-1, 4))(delta) model = models.Model(inp, [logits, delta, score]) return model
def __init__(self, input_shape, num_classes): conv2d_32 = layers.Conv2D(32, kernel_size=(3, 3)) conv2d_64 = layers.Conv2D(64, kernel_size=(3, 3)) max_pool = layers.MaxPool2D(pool_size=(2, 2)) dropout_025 = layers.Dropout(0.25) flatten = layers.Flatten() dense_128 = layers.Dense(128) dropout_050 = layers.Dropout(0.5) relu = layers.Activation('relu') relu = layers.ReLU() softmax = layers.Activation('softmax') softmax = layers.Softmax() dense_out = layers.Dense(num_classes) x = layers.Input(shape=input_shape) h1 = relu(conv2d_32(x)) h2 = relu(conv2d_64(h1)) h3 = max_pool(h2) h3_dropout = dropout_025(h3) h4 = flatten(h3_dropout) h5 = relu(dense_128(h4)) h5_dropout = dropout_050(h5) y = softmax(dense_out(h5_dropout)) super().__init__(x, y) self.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
def _resnet(block, blocks_num, im_width=224, im_height=224, num_classes=2, include_top=True): # tensorflow中的tensor通道排序是NHWC # (None, 224, 224, 3) input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32") x = layers.Conv2D(filters=64, kernel_size=7, strides=2, padding="SAME", use_bias=False, name="conv1")(input_image) x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name="conv1/BatchNorm")(x) x = layers.ReLU()(x) x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME")(x) x = _make_layer(block, x.shape[-1], 64, blocks_num[0], name="block1")(x) x = _make_layer(block, x.shape[-1], 128, blocks_num[1], strides=2, name="block2")(x) x = _make_layer(block, x.shape[-1], 256, blocks_num[2], strides=2, name="block3")(x) x = _make_layer(block, x.shape[-1], 512, blocks_num[3], strides=2, name="block4")(x) if include_top: x = layers.GlobalAvgPool2D()(x) # pool + flatten x = layers.Dense(num_classes, name="logits")(x) predict = layers.Softmax()(x) else: predict = x model = Model(inputs=input_image, outputs=predict) return model
def create_model(input_shape: tuple, nb_classes: int, init_with_imagenet: bool = False, learning_rate: float = 0.01): weights = None if init_with_imagenet: weights = "imagenet" model = VGG16(input_shape=input_shape, classes=nb_classes, weights=weights, include_top=False) # "Shallow" VGG for Cifar10 x = model.get_layer('block3_pool').output x = layers.Flatten(name='Flatten')(x) x = layers.Dense(512, activation='relu')(x) x = layers.Dense(nb_classes)(x) x = layers.Softmax()(x) model = models.Model(model.input, x) loss = losses.categorical_crossentropy optimizer = optimizers.SGD(lr=learning_rate, decay=0.99) model.compile(optimizer, loss, metrics=["accuracy"]) return model
def D3GenerateModel(n_filter=16, number_of_class=1, input_shape=(16,144,144,1),activation_last='softmax', metrics=['mse', 'acc', dice_coef, recall_at_thresholds, precision_at_thresholds], loss='categorical_crossentropy', dropout=0.05, init='glorot_uniform', two_output=False): #init = initializers.VarianceScaling(scale=1.0, mode='fan_in', distribution='normal', seed=None) filter_size =n_filter input_x = layers.Input(shape=input_shape,name='Input_layer', dtype = 'float32') #1 level x = layers.Conv3D(filters=filter_size, kernel_size=(5,5,5), strides = (1,1,1), kernel_initializer=init, padding='same')(input_x) x = cyclical_learning_rate.SineReLU()(x) x = layers.Conv3D(filters=filter_size, kernel_size=(5,5,5), strides=(1,1, 1), padding='same',kernel_initializer=init)(x) x = cyclical_learning_rate.SineReLU()(x) x = layers.MaxPooling3D(pool_size=(2,2,2), padding='same')(x) #2 level conv_list = [] counter = 0 x = layers.Conv3D(filters=filter_size*2, kernel_size=(3,3,3), strides=(1,1, 1), padding='same',kernel_initializer=init)(x) x = cyclical_learning_rate.SineReLU()(x) x = layers.Conv3D(filters=filter_size*2, kernel_size=(3,3,3), strides=(1,1, 1), padding='same',kernel_initializer=init)(x) x = cyclical_learning_rate.SineReLU()(x) x = layers.AveragePooling3D(pool_size=(1,2,2), padding='same')(x) x = layers.UpSampling3D(size=(1,2,2))(x) for index ,kernel_sizes in enumerate([ [(1,3,3), (3,3,1)], #Changed [(1,3,3), (1,1,3)] [(3,3,3), (3,1,3)], #Changed [(3,3,3), (3,1,3)] [(3,3,1), (3,3,3), (1,3,3)] #Changed [(3,3,1), (1,3,1)] ]): for kernel_size in (kernel_sizes): x = layers.Conv3D(filters=(filter_size*4), kernel_size=kernel_size, kernel_initializer=init, strides =(1,1,1), padding='same', name='Conv3D_%s' % (counter))(x) x = layers.BatchNormalization()(x) x = cyclical_learning_rate.SineReLU()(x) counter = counter+1 conv_list.append(x) x = layers.concatenate(conv_list) x = layers.Conv3D(filters=filter_size*8, kernel_size=(3,3,3), strides=(2,2, 2), kernel_initializer=init, padding='same')(x) x = layers.BatchNormalization()(x) x = cyclical_learning_rate.SineReLU()(x) #x = layers.MaxPooling3D(pool_size=(2,2, 2))(x) x = layers.Reshape(target_shape=[4,-1, filter_size*8])(x) x = layers.Conv2D(filters=filter_size*8, kernel_size=(1,1296), kernel_initializer=init, strides=(1,1296))(x) x = layers.BatchNormalization()(x) x = cyclical_learning_rate.SineReLU()(x) x = layers.Reshape(target_shape=[filter_size*8,-1])(x) x = layers.Conv1D(filters=2, kernel_size=filter_size*8, strides=filter_size*8, kernel_initializer=init)(x) x = layers.Softmax()(x) y = layers.Flatten()(x) #Classification model = Model(inputs=input_x, outputs=y) #optimizer = tf.contrib.opt.AdamWOptimizer(weight_decay=0.000001,lr=lr) #keras.optimizers.SGD(lr=lr, momentum=0.90, decay=decay, nesterov=False) #opt_noise = add_gradient_noise(optimizers.Adam) #optimizer = 'Adam'#opt_noise(lr, amsgrad=True)#, nesterov=True)#opt_noise(lr, amsgrad=True) import yogi optimizer = yogi.Yogi(lr=lr) #optimizer=optimizers.adam(lr, amsgrad=True) model.compile(optimizer=optimizer,loss=loss, metrics=metrics)#categorical_crossentropy return model
def pointnet2_cls_ssg(num_class, num_points, num_dim = 3): ''' input: BxNx3 output: Bxnum_class ''' input = keras.Input((num_points,num_dim)) # (batch, num_points, num_dim) inp = input if num_dim > 3: l0_xyz = crop(2, 0, 3)(input) l0_points = crop(2, 3, num_dim)(input) use_feature = True else : l0_xyz = input l0_points = input # useless # for the first stage, there is no high level feature, only coordinate use_feature = False l1_xyz, l1_points, _ = pointnet_sa_module(l0_xyz, l0_points, n_centroid=512, radius=0.2, n_samples=32, mlp=[64,64,128], bn=True, relu6=False, use_xyz=True, use_feature=use_feature, random_sample=False) l2_xyz, l2_points, _ = pointnet_sa_module(l1_xyz, l1_points, n_centroid=128, radius=0.4, n_samples=64, mlp=[128,128,256], bn=True, relu6=False, use_xyz=True, use_feature=True, random_sample=False) ''' l3_xyz, l3_points, _ = pointnet_sa_module(l2_xyz, l2_points, n_centroid=32, radius=0.6, n_samples=32, mlp=[256,512,1024], bn=True, relu6=False, use_xyz=True, use_feature=True) x = layers.GlobalMaxPooling1D()(l3_points) # at this stage, no sampling or grouping, use PointNet layer directly # as Keras don't support None as input or output # the original implementation doesn't work here ''' # try this instead x = l2_points x = layers.Reshape((-1,1,256))(x) x = mlp_layers(x, [256, 512, 1024]) x = layers.GlobalMaxPooling2D()(x) # fullly connected layers # x = layers.Flatten()(x) # (Batch, :) x = fully_connected(x, 512, bn=True, relu6=False, activation=True) x = layers.Dropout(0.5)(x) x = fully_connected(x, 256, bn=True, relu6 = False, activation=True) x = layers.Dropout(0.5)(x) x = fully_connected(x, num_class, bn=False, activation=False) # no BN nor ReLU here x = layers.Softmax()(x) return keras.models.Model(inputs=inp, outputs=x)
def post_convblock(x): y = layers.Conv2D(filters=32, kernel_size=(5, 5), padding="valid", activation="relu")(x) y = layers.Conv2D(filters=2, kernel_size=(5, 5), padding="valid", activation='tanh')(y) y = layers.Softmax(axis=-1)(y) return y
def _build_model(self): return tf.keras.Sequential([ layers.Dense(100, batch_input_shape=(None, self.max_turns, self.code_length + 2)), layers.Dense(80), layers.Flatten(), layers.Dense(32), layers.Reshape((self.code_length, self.colors_amount)), layers.Softmax() ])
def _last_layer(output, last_layer): if last_layer == 'softmax': output = layers.Softmax(axis=-1)(output) elif last_layer == 'sigmoid': output = layers.Activation(activation='sigmoid')(output) elif last_layer == 'relu': output = layers.Activation(activation='relu')(output) elif last_layer == 'leaky_relu': output = layers.LeakyReLU()(output) elif last_layer == 'prelu': output = layers.PReLU()(output) return output
def __init__(self, h, sym_count, batch_size, z_dim, alpha): self.h, self.sym_count, self.batch_size = h, sym_count, batch_size inputs = keras.Input((self.h, self.sym_count)) q_lstm = layers.LSTM(alpha, return_sequences=True)(inputs) q_lstm = layers.LSTM(alpha)(q_lstm) z_mean = layers.Dense(z_dim)(q_lstm) z_log_std = layers.Dense(z_dim)(q_lstm) def sampling(args): z_mean, z_log_std = args epsilon = K.random_normal(shape=(batch_size, z_dim)) return z_mean + z_log_std * epsilon z = layers.Lambda(sampling, output_shape=(z_dim, ))([z_mean, z_log_std]) p_repeat = layers.RepeatVector(self.h)(z) p_lstm = layers.LSTM(alpha, return_sequences=True)(p_repeat) p_lstm = layers.LSTM(alpha, return_sequences=True)(p_lstm) p_lstm = layers.LSTM(self.sym_count)(p_lstm) p_output = layers.Softmax()(p_lstm) inputs_last = layers.Lambda(lambda x: x[:, -1])(inputs) nll = layers.Lambda( lambda args: K.categorical_crossentropy(args[0], args[1]))( [inputs_last, p_output]) def x_loss(x_true, x_pred): # return K.mean(losses.mse(x_true, x_pred), axis=1) return losses.categorical_crossentropy(x_true[:, -1], x_pred) def kl_loss(x_true, x_pred): return -0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std)) def vae_loss(x_true, x_pred): return x_loss(x_true, x_pred) + kl_loss(x_true, x_pred) self.model = keras.Model(inputs=[inputs], outputs=p_output) self.model.compile(loss=vae_loss, optimizer=keras.optimizers.Adam(), metrics=[x_loss, kl_loss]) self.model.summary() self.nll_model = keras.Model(inputs=[inputs], outputs=nll)
def _network4(): optimizer = optimizers.Adadelta(lr=2) model = Sequential() # output 44,6 model.add( layers.Conv1D(filters=30, kernel_size=70, strides=2, activation='relu')) # output 11, 3 if NO_FEATURES == 308: model.add(layers.MaxPool1D(120, 2)) if NO_FEATURES == 300: model.add(layers.MaxPool1D(116, 2)) model.add(layers.Dense(units=3, activation='softmax')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(units=3, activation='relu')) model.add(layers.Softmax()) model.compile(loss=losses.categorical_crossentropy, optimizer=optimizer, metrics=['accuracy']) [favorites_dataset, retweets_dl] = _transform_to_dataloader() x_train, y_train = favorites_dataset[0] x_validation, y_validation = favorites_dataset[1] x_train = np.expand_dims(x_train, axis=2) y_train = np.expand_dims(to_categorical(y_train, num_classes=3), axis=1) x_validation = np.expand_dims(x_validation, axis=2) y_validation = np.expand_dims(to_categorical(y_validation, num_classes=3), axis=1) x = model.fit(x_train, y_train, validation_split=0.3, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1, shuffle=True, callbacks=[es])
def head(x, cfg): # x (batch_size=1, rois, pool_height,pool_width, channels) x = layers.TimeDistributed(layers.Conv2D(cfg.FC_LAYERS, (7, 7)), name='flatten_conv1')(x) x = layers.TimeDistributed(layers.BatchNormalization(), name='faltten_bn1')(x) x = layers.TimeDistributed(layers.ReLU())(x) x = layers.TimeDistributed(layers.Conv2D(cfg.FC_LAYERS, (1, 1)), name='flatten_conv2')(x) x = layers.TimeDistributed(layers.BatchNormalization(), name='faltten_bn2')(x) x = layers.TimeDistributed(layers.ReLU())(x) shared = layers.Lambda(lambda x: tf.squeeze(tf.squeeze(x, 3), 2), name='flatten')(x) logits2 = layers.TimeDistributed(layers.Dense(cfg.N_CLASSES), name='fc1')(shared) scores2 = layers.TimeDistributed(layers.Softmax())(logits2) delta2 = layers.TimeDistributed(layers.Dense(4 * cfg.N_CLASSES), name='fc2')(shared) delta2 = layers.Reshape((-1, cfg.N_CLASSES, 4))(delta2) return logits2, delta2, scores2
def make_classifier(num_points=2048, feature_dims=0, n_centroid=[256, 128, 64, 32]): ''' Create a classifier with votenet backbone. This is not aiming at a super accuray classifier Only for validing the backone. ''' input = keras.Input((num_points, 3 + feature_dims)) xyz, features, idx = votenet_backbone(input, feature_dims=feature_dims, n_centroid=n_centroid) # make a quick and dirty classifier to validate votenet_backbone global_features = layers.MaxPool1D(pool_size=n_centroid[1])(features) net = layers.Flatten()(global_features) # B, 256 net = layers.Dense(128, use_bias=False)(net) net = layers.BatchNormalization()(net) net = layers.ReLU()(net) net = layers.Dropout(0.7)(net) net = layers.Dense(40, use_bias=True)(net) net = layers.Softmax()(net) return keras.Model(inputs=input, outputs=net)
def attention_lstm(): input = layers.Input(shape=(lookback // step, data.shape[-1])) encoder = layers.LSTM(32, return_sequences=True)(input) # attention attention_pre = layers.Dense(1, name='activation_vec')(encoder) attention_porbs = layers.Softmax()(attention_pre) attention_mul = layers.Lambda(lambda x: x[0] * x[1])( [attention_porbs, encoder]) decoder = layers.LSTM(32, return_sequences=True)(attention_mul) output = layers.Flatten()(decoder) output = layers.Dense(1)(output) model = Model(inputs=input, outputs=output) model.summary() model.compile(optimizer=RMSprop(), loss='mae', metrics=['acc']) history = model.fit_generator(train_gen, steps_per_epoch=500, epochs=40, validation_data=val_gen, validation_steps=val_steps) model.save('mul_LSTM_model') return history
def rational_multi_model(I=5, K=5, J=5): from keras import backend input = layers.Input(shape=(2, ), name='input') sigmoid = layers.Dense(K, activation='sigmoid', name='sigmoid')(input) left = layers.Softmax(I, name='softmax')(sigmoid) single_models = [] for j in range(J): single_models.append(rational_model_v2(J=J, asLayer=True)(input)) right = layers.Concatenate(name='single_models')(single_models) mult = layers.Multiply(name='combine')([left, right]) out = layers.Lambda(lambda x: backend.sum(x, axis=1), output_shape=(1, ), name='sum')(mult) model = models.Model(inputs=[input], outputs=out) model.save_weights(paths['weights']) if not onCluster: plot_model(model, to_file='model-diagrams/rational_multi.png') model.compile(optimizer='sgd', loss='mse', metrics=['mae']) return model
def auto_det_model(args,anchors,catlen): [input,x,tlist]=FCN(args) list=[] for layer in tlist: if "re_lu" in layer.name: list.append(layer) #print(list) print("Maps connected to target, from %d to %d" %(args.minmap,args.maxmap)) maps=[] for i in range(args.autonconv-1,len(list),args.autonconv): if (min(list[i].shape[1],list[i].shape[2])>=args.minmap)and(max(list[i].shape[1],list[i].shape[2])<=args.maxmap): print(list[i].name,list[i].shape[1],"x",list[i].shape[2]) maps.append(list[i]) depth=anchors*catlen ks=3 outs=[] outm=[] for m in maps: x=layers.Conv2D(depth, kernel_size=(ks, ks),strides=(1,1),padding='same')(m) outm.append(x) x=layers.Reshape((-1,catlen))(x) x=layers.Softmax(axis=2)(x) outs.append(x) output = layers.concatenate(outs,axis=1) model = models.Model(inputs=[input], outputs=output) return model,outm
def retain(ARGS): '''Create the model''' #Define the constant for model saving reshape_size = ARGS.emb_size + ARGS.numeric_size if ARGS.allow_negative: embeddings_constraint = FreezePadding() beta_activation = 'tanh' output_constraint = None else: embeddings_constraint = FreezePadding_Non_Negative() beta_activation = 'sigmoid' output_constraint = non_neg() #Get available gpus , returns empty list if none glist = get_available_gpus() def reshape(data): '''Reshape the context vectors to 3D vector''' return K.reshape(x=data, shape=(K.shape(data)[0], 1, reshape_size)) #Code Input codes = L.Input((None, None), name='codes_input') inputs_list = [codes] #Calculate embedding for each code and sum them to a visit level codes_embs_total = L.Embedding( ARGS.num_codes + 1, ARGS.emb_size, name='embedding', embeddings_constraint=embeddings_constraint)(codes) codes_embs = L.Lambda(lambda x: K.sum(x, axis=2))(codes_embs_total) #Numeric input if needed if ARGS.numeric_size: numerics = L.Input((None, ARGS.numeric_size), name='numeric_input') inputs_list.append(numerics) full_embs = L.concatenate([codes_embs, numerics], name='catInp') else: full_embs = codes_embs #Apply dropout on inputs full_embs = L.Dropout(ARGS.dropout_input)(full_embs) #Time input if needed if ARGS.use_time: time = L.Input((None, 1), name='time_input') inputs_list.append(time) time_embs = L.concatenate([full_embs, time], name='catInp2') else: time_embs = full_embs #Setup Layers #This implementation uses Bidirectional LSTM instead of reverse order # (see https://github.com/mp2893/retain/issues/3 for more details) #If training on GPU and Tensorflow use CuDNNLSTM for much faster training if glist: alpha = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='alpha') beta = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='beta') else: alpha = L.Bidirectional(L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name='alpha') beta = L.Bidirectional(L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name='beta') alpha_dense = L.Dense(1, kernel_regularizer=l2(ARGS.l2)) beta_dense = L.Dense(ARGS.emb_size + ARGS.numeric_size, activation=beta_activation, kernel_regularizer=l2(ARGS.l2)) #Compute alpha, visit attention alpha_out = alpha(time_embs) alpha_out = L.TimeDistributed(alpha_dense, name='alpha_dense_0')(alpha_out) alpha_out = L.Softmax(axis=1)(alpha_out) #Compute beta, codes attention beta_out = beta(time_embs) beta_out = L.TimeDistributed(beta_dense, name='beta_dense_0')(beta_out) #Compute context vector based on attentions and embeddings c_t = L.Multiply()([alpha_out, beta_out, full_embs]) c_t = L.Lambda(lambda x: K.sum(x, axis=1))(c_t) #Reshape to 3d vector for consistency between Many to Many and Many to One implementations contexts = L.Lambda(reshape)(c_t) #Make a prediction contexts = L.Dropout(ARGS.dropout_context)(contexts) output_layer = L.Dense(1, activation='sigmoid', name='dOut', kernel_regularizer=l2(ARGS.l2), kernel_constraint=output_constraint) #TimeDistributed is used for consistency # between Many to Many and Many to One implementations output = L.TimeDistributed(output_layer, name='time_distributed_out')(contexts) #Define the model with appropriate inputs model = Model(inputs=inputs_list, outputs=[output]) return model
def build_model(max_encoder_seq_length, max_decoder_seq_length, num_encoder_tokens, num_decoder_tokens, latent_dim): # 인코더 생성 encoder_inputs = layers.Input(shape=(max_encoder_seq_length, num_encoder_tokens)) encoder = layers.GRU(latent_dim, return_sequences=True, return_state=True) encoder_outputs, state_h = encoder(encoder_inputs) # 디코더 생성. decoder_inputs = layers.Input(shape=(max_decoder_seq_length, num_decoder_tokens)) decoder = layers.GRU(latent_dim, return_sequences=True, return_state=True) decoder_outputs, _ = decoder(decoder_inputs, initial_state=state_h) # 어텐션 매커니즘. repeat_d_layer = RepeatVectorLayer(max_encoder_seq_length, 2) repeat_d = repeat_d_layer(decoder_outputs) repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1) repeat_e = repeat_e_layer(encoder_outputs) concat_for_score_layer = layers.Concatenate(axis=-1) concat_for_score = concat_for_score_layer([repeat_d, repeat_e]) dense1_t_score_layer = layers.Dense(latent_dim // 2, activation='tanh') dense1_score_layer = layers.TimeDistributed(dense1_t_score_layer) dense1_score = dense1_score_layer(concat_for_score) dense2_t_score_layer = layers.Dense(1) dense2_score_layer = layers.TimeDistributed(dense2_t_score_layer) dense2_score = dense2_score_layer(dense1_score) dense2_score = layers.Reshape( (max_decoder_seq_length, max_encoder_seq_length))(dense2_score) softmax_score_layer = layers.Softmax(axis=-1) softmax_score = softmax_score_layer(dense2_score) repeat_score_layer = RepeatVectorLayer(latent_dim, 2) repeat_score = repeat_score_layer(softmax_score) permute_e = layers.Permute((2, 1))(encoder_outputs) repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1) repeat_e = repeat_e_layer(permute_e) attended_mat_layer = layers.Multiply() attended_mat = attended_mat_layer([repeat_score, repeat_e]) context_layer = layers.Lambda(lambda x: K.sum(x, axis=-1), lambda x: tuple(x[:-1])) context = context_layer(attended_mat) concat_context_layer = layers.Concatenate(axis=-1) concat_context = concat_context_layer([context, decoder_outputs]) attention_dense_output_layer = layers.Dense(latent_dim, activation='tanh') attention_output_layer = layers.TimeDistributed( attention_dense_output_layer) attention_output = attention_output_layer(concat_context) decoder_dense = layers.Dense(num_decoder_tokens, activation='softmax') decoder_outputs = decoder_dense(attention_output) # 모델 생성 model = models.Model([encoder_inputs, decoder_inputs], decoder_outputs) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc']) model.summary() return model
def get_RAEwSC_and_ExogenousVars_w_input_w_clf(params, embedding_size=2048, im_height=495, im_width=436, channels=3, length_seq_in=3, length_seq_out=3, dropout_enc=0.05, dropout_dec=0.05, b_norm_enc=True, b_norm_dec=True, out_clf=5): """ Recurrent Autoencoder with Skip Connections (RAEwSC) multiple outputs & TimeDistributed: https://github.com/keras-team/keras/issues/6449 multiple inputs & TimeDistributed: """ # define inputs img_input_shape = (im_height, im_width, channels) seq_input_shape = (length_seq_in,) + img_input_shape prev_frames = layers.Input(shape=seq_input_shape, name='prev_frames') seq_output_shape = (length_seq_out,) + img_input_shape future_frames = layers.Input(shape=seq_output_shape, name='future_frames') clf_shape = (im_height, im_width, out_clf) # define encoder/decoder models for a single frame encoder, decoder = get_encoder_decoder_w_input(embedding_size, im_height, im_width, channels, dropout_enc=dropout_enc, dropout_dec=dropout_dec, b_norm_enc=b_norm_enc, b_norm_dec=b_norm_dec) #################################################### Encoder Phase ####### PAST # get embeddings for each frame in the sequence embeddings = [] for i in range(length_seq_in): # slice & encoder for current frame current_frame = Lambda(lambda x: x[:, i], output_shape=(im_height, im_width, channels))(prev_frames) h, c5, c4, c3, c2, c1, c0 = encoder(current_frame) # append encoders adding the sequence dimension to later concatenate them h = Reshape( (1, embedding_size) )(h) embeddings.append(h) embeddings = concatenate(embeddings, axis=1, name='Concat_embeddings') print("embeddings.shape:", embeddings.shape) ####### FUTURE # encode future frames to guide the recurrent-manifold construction future_embeddings = [] for i in range(length_seq_out): # slice & encoder for current frame current_fut_frame = Lambda(lambda x: x[:, i], output_shape=(im_height, im_width, channels))(future_frames) h, _, _, _, _, _, _ = encoder(current_fut_frame) # append encoders adding the sequence dimension to later concatenate them h = Reshape( (1, embedding_size) )(h) future_embeddings.append(h) future_embeddings = concatenate(future_embeddings, axis=1, name='Concat_future_emb') print("future_embeddings.shape:", future_embeddings.shape) #################################################### Add Exogenous Vars Phase: day_input, time_input # time-flow inputs day_input_shape = (length_seq_in, (1+length_seq_out)*50) time_input_shape = (length_seq_in, (1+length_seq_out)*2) day_input = layers.Input(shape=day_input_shape, name='day_info') time_input = layers.Input(shape=time_input_shape, name='time_info') # weather inputs weather_categorical_input_shape = (length_seq_in, (1+length_seq_out)*28) weather_continous_input_shape = (length_seq_in, (1+length_seq_out)*5) weather_categorical_input = layers.Input(shape=weather_categorical_input_shape, name='weather_categorical') weather_continous_input = layers.Input(shape=weather_continous_input_shape, name='weather_continous') # embedding for categorical data # weather_categorical_input = layers.TimeDistributed( layers.Dense(params['embed_weather'], name='embed_weather') )(weather_categorical_input) # concat visual and exogenous varibales & combine with a FC layer embeddings = concatenate([embeddings, day_input, time_input, weather_categorical_input, weather_continous_input], axis=-1, name='Concat_exogenous') print("concatenation of all inputs:", embeddings.shape) embeddings = layers.TimeDistributed( layers.Dense(params['units_before_recurrent'], activation=ACTIVATION, name='embedding_FC') )(embeddings) print("FC before recurrent embeddings.shape:", embeddings.shape) #################################################### Recurrent Phase # time encoder embeddings = layers.GRU(params['gru_enc_1'], activation='tanh', return_sequences = True, name='gru_enc_1_FC')(embeddings) embeddings = layers.GRU(params['gru_enc_2'], activation='tanh', return_sequences = False, name='gru_enc_2')(embeddings) embeddings = RepeatVector(length_seq_out, name='repeat_vector')(embeddings) # time decoder embeddings = layers.GRU(params['gru_dec_1'], activation='tanh', return_sequences = True, name='gru_dec_1')(embeddings) embeddings = layers.GRU(params['gru_dec_2'], activation='tanh', return_sequences = True, name='gru_dec_2')(embeddings) embeddings = layers.GRU(embedding_size, activation='tanh', return_sequences = True, name='gru_dec_3')(embeddings) print("recurrent embeddings.shape:", embeddings.shape) #################################################### Decoder Phase # get decoder for each frame predicted in the sequence (using skip connection from the most known recent frame and its inmput) prediced_frames = [] clfs = [] for i in range(length_seq_out): # slice & decoder for current frame current_embedding = Lambda(lambda x: x[:, i], output_shape=(embedding_size,))(embeddings) current_pred_frame, clf = decoder([current_embedding, c5, c4, c3, c2, c1, c0, current_frame]) # append frames adding the sequence dimension to later concatenate them current_pred_frame = Reshape( (1,)+img_input_shape )(current_pred_frame) prediced_frames.append(current_pred_frame) clf = Reshape( (1,)+clf_shape )(clf) clfs.append(clf) prediced_frames = concatenate(prediced_frames, axis=1, name='Concat_predicted_frames') print("prediced_frames.shape:", prediced_frames.shape) clfs = concatenate(clfs, axis=1, name='Concat_clfs') # prepare the clfs for the loss clfs = layers.Reshape((-1, out_clf))(clfs) # vectorize all frames clfs = layers.Softmax(name='softmax_clf')(clfs) # apply softmax print("clfs.shape:", clfs.shape) return Model(inputs=[prev_frames, future_frames, day_input, time_input, weather_categorical_input, weather_continous_input], outputs=[prediced_frames, clfs], name='RAE_w_SC_WT_I'), embeddings, future_embeddings
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=(None, None, None,), name='context', dtype='int32') query = L.Input(shape=(None,), name='query', dtype='int32') if ilp: context, query, templates = ilp # Contextual embeddeding of symbols onehot_weights = np.eye(char_size) onehot_weights[0, 0] = 0 # Clear zero index onehot = L.Embedding(char_size, char_size, trainable=False, weights=[onehot_weights], name='onehot') embedded_ctx = onehot(context) # (?, rules, preds, chars, char_size) embedded_q = onehot(query) # (?, chars, char_size) if ilp: # Combine the templates with the context, (?, rules+temps, preds, chars, char_size) embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1), name='template_concat')([templates, embedded_ctx]) # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1) embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred') embedded_predq = embed_pred(embedded_q) # (?, dim) # For every rule, for every predicate, embed the predicate embedded_ctx_preds = NestedTimeDist(NestedTimeDist(embed_pred, name='nest1'), name='nest2')(embedded_ctx) # (?, rules, preds, dim) embed_rule = ZeroGRU(dim, name='embed_rule') embedded_rules = NestedTimeDist(embed_rule, name='d_embed_rule')(embedded_ctx_preds) # (?, rules, dim) # Reused layers over iterations repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1], name='repeat_to_ctx') diff_sq = L.Lambda(lambda xy: K.square(xy[0]-xy[1]), output_shape=(None, dim), name='diff_sq') mult = L.Multiply() concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2), output_shape=(None, dim*5), name='concat') att_densel = L.Dense(dim//2, activation='tanh', name='att_densel') att_dense = L.Dense(1, name='att_dense') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') softmax1 = L.Softmax(axis=1) unifier = NestedTimeDist(ZeroGRU(dim, go_backwards=False, name='unifier'), name='dist_unifier') dot11 = L.Dot((1, 1)) # Reasoning iterations state = embedded_predq repeated_q = repeat_toctx(embedded_predq) outs = list() for _ in range(iterations): # Compute attention between rule and query state ctx_state = repeat_toctx(state) # (?, rules, dim) s_s_c = diff_sq([ctx_state, embedded_rules]) s_m_c = mult([embedded_rules, state]) # (?, rules, dim) sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q]) sim_vec = att_densel(sim_vec) # (?, rules, dim//2) sim_vec = att_dense(sim_vec) # (?, rules, 1) sim_vec = squeeze2(sim_vec) # (?, rules) sim_vec = softmax1(sim_vec) outs.append(sim_vec) # Unify every rule and weighted sum based on attention new_states = unifier(embedded_ctx_preds, initial_state=[state]) # (?, rules, dim) state = dot11([sim_vec, new_states]) # Predication out = L.Dense(1, activation='sigmoid', name='out')(state) if ilp: return outs, out elif pca: model = Model([context, query], [embedded_rules]) elif training: model = Model([context, query], [out]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
def DxCM(ARGS): reshape_size = ARGS.emb_size beta_activation = 'relu' def reshape(data): """Reshape the context vectors to 3D vector""" return K.reshape(x=data, shape=(K.shape(data)[0], 1, reshape_size)) #Code Input diag = L.Input((None, None), name='diag_input') proc = L.Input((None, None), name='proc_input') time = L.Input((None, None), name='time_input') #claim_sup = L.Input((None, None), name='claim_sup') inputs_list = [diag, proc, time] #Calculate embedding for each code and sum them to a visit level diag_embs_total = L.Embedding(ARGS.num_diag+1, ARGS.emb_size, name='diag_embedding', weights = [np.load(ARGS.pretrained_diag_embedding,allow_pickle=True)], trainable=False, #embeddings_constraint=embeddings_constraint )(diag) proc_embs_total = L.Embedding(ARGS.num_proc+1, ARGS.emb_size, name='proc_embedding')(proc) # no constraint time_embs_total = L.Embedding(ARGS.num_time+1, ARGS.emb_size, name='time_embedding')(time) # no constraint diag_embs = L.Lambda(lambda x: K.sum(x, axis=2))(diag_embs_total) proc_embs = L.Lambda(lambda x: K.sum(x, axis=2))(proc_embs_total) time_embs = L.Lambda(lambda x: K.sum(x, axis=2))(time_embs_total) diag_embs = L.Dropout(ARGS.dropout_input)(diag_embs) proc_embs = L.Dropout(ARGS.dropout_input)(proc_embs) time_embs = L.Dropout(ARGS.dropout_input)(time_embs) full_embs = L.concatenate([diag_embs, proc_embs, time_embs], name='full_embs') alpha = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='alpha') beta = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size, return_sequences=True), name='beta') alpha_dense = L.Dense(1, kernel_regularizer=l2(ARGS.l2)) beta_dense = L.Dense(ARGS.emb_size+ARGS.numeric_size, activation=beta_activation, kernel_regularizer=l2(ARGS.l2)) alpha_out = alpha(full_embs) alpha_out = L.TimeDistributed(alpha_dense, name='alpha_dense_0')(alpha_out) alpha_out = L.Softmax(axis=1)(alpha_out) beta_out = beta(full_embs) beta_out = L.TimeDistributed(beta_dense, name='beta_dense_0')(beta_out) c_t = L.Multiply()([alpha_out, beta_out, diag_embs]) c_t = L.Lambda(lambda x: K.sum(x, axis=1))(c_t) contexts = L.Lambda(reshape)(c_t) contexts = L.Dropout(ARGS.dropout_context)(contexts) output_layer = L.Dense(1, activation=None, name='dOut', kernel_initializer= initializers.RandomUniform(0, 1000), kernel_regularizer=l2(ARGS.l2)) output = L.TimeDistributed(output_layer, name='time_distributed_out')(contexts) model = Model(inputs=inputs_list, outputs=[output]) return model
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=( None, None, None, ), name='context', dtype='int32') query = L.Input(shape=(None, ), name='query', dtype='int32') # Flatten preds to embed entire rules var_flat = L.Lambda(lambda x: K.reshape( x, K.stack([K.shape(x)[0], -1, K.prod(K.shape(x)[2:])])), name='var_flat') flat_ctx = var_flat(context) # (?, rules, preds*chars) # Onehot embeddeding of symbols onehot_weights = np.eye(char_size) onehot_weights[0, 0] = 0 # Clear zero index onehot = L.Embedding(char_size, char_size, trainable=False, weights=[onehot_weights], name='onehot') embedded_ctx = onehot(flat_ctx) # (?, rules, preds*chars*char_size) embedded_q = onehot(query) # (?, chars, char_size) # Embed predicates embed_pred = ZeroGRU(dim, go_backwards=True, return_sequences=True, return_state=True, name='embed_pred') embedded_predqs, embedded_predq = embed_pred(embedded_q) # (?, chars, dim) embed_pred.return_sequences = False embed_pred.return_state = False # Embed every rule embedded_rules = L.TimeDistributed(embed_pred, name='rule_embed')(embedded_ctx) # (?, rules, dim) # Reused layers over iterations concatm1 = L.Concatenate(name='concatm1') repeat_toqlen = L.RepeatVector(K.shape(embedded_q)[1], name='repeat_toqlen') mult_cqi = L.Multiply(name='mult_cqi') dense_cqi = L.Dense(dim, name='dense_cqi') dense_cais = L.Dense(1, name='dense_cais') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') softmax1 = L.Softmax(axis=1, name='softmax1') dot11 = L.Dot((1, 1), name='dot11') repeat_toctx = L.RepeatVector(K.shape(context)[1], name='repeat_toctx') memory_dense = L.Dense(dim, name='memory_dense') kb_dense = L.Dense(dim, name='kb_dense') mult_info = L.Multiply(name='mult_info') info_dense = L.Dense(dim, name='info_dense') mult_att_dense = L.Multiply(name='mult_att_dense') read_att_dense = L.Dense(1, name='read_att_dense') mem_info_dense = L.Dense(dim, name='mem_info_dense') stack1 = L.Lambda(lambda xs: K.stack(xs, 1), output_shape=(None, dim), name='stack1') mult_self_att = L.Multiply(name='mult_self_att') self_att_dense = L.Dense(1, name='self_att_dense') misa_dense = L.Dense(dim, use_bias=False, name='misa_dense') mi_info_dense = L.Dense(dim, name='mi_info_dense') add_mip = L.Lambda(lambda xy: xy[0] + xy[1], name='add_mip') control_gate = L.Dense(1, activation='sigmoid', name='control_gate') gate2 = L.Lambda(lambda xyg: xyg[2] * xyg[0] + (1 - xyg[2]) * xyg[1], name='gate') # Init control and memory zeros_like = L.Lambda(K.zeros_like, name='zeros_like') memory = embedded_predq # (?, dim) control = zeros_like(memory) # (?, dim) pmemories, pcontrols = [memory], [control] # Reasoning iterations outs = list() for i in range(iterations): # Control Unit qi = L.Dense(dim, name='qi' + str(i))(embedded_predq) # (?, dim) cqi = dense_cqi(concatm1([control, qi])) # (?, dim) cais = dense_cais(mult_cqi([repeat_toqlen(cqi), embedded_predqs])) # (?, qlen, 1) cais = squeeze2(cais) # (?, qlen) cais = softmax1(cais) # (?, qlen) outs.append(cais) new_control = dot11([cais, embedded_predqs]) # (?, dim) # Read Unit info = mult_info( [repeat_toctx(memory_dense(memory)), kb_dense(embedded_rules)]) # (?, rules, dim) infop = info_dense(concatm1([info, embedded_rules])) # (?, rules, dim) rai = read_att_dense(mult_att_dense([repeat_toctx(new_control), infop])) # (?, rules, 1) rai = squeeze2(rai) # (?, rules) rai = softmax1(rai) # (?, rules) outs.append(rai) read = dot11([rai, embedded_rules]) # (?, dim) # Write Unit mi_info = mem_info_dense(concatm1([read, memory])) # (?, dim) past_ctrls = stack1(pcontrols) # (?, i+1, dim) sai = self_att_dense( mult_self_att([L.RepeatVector(i + 1)(new_control), past_ctrls])) # (?, i+1, 1) sai = squeeze2(sai) # (?, i+1) sai = softmax1(sai) # (?, i+1) outs.append(sai) past_mems = stack1(pmemories) # (?, i+1, dim) misa = L.dot([sai, past_mems], (1, 1), name='misa_' + str(i)) # (?, dim) mip = add_mip([misa_dense(misa), mi_info_dense(mi_info)]) # (?, dim) cip = control_gate(new_control) # (?, 1) outs.append(cip) new_memory = gate2([mip, memory, cip]) # (?, dim) # Update state pcontrols.append(new_control) pmemories.append(new_memory) memory, control = new_memory, new_control # Output Unit out = L.Dense(1, activation='sigmoid', name='out')(concatm1([embedded_predq, memory])) if training: model = Model([context, query], out) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
def make_model_from_dp(js): shape = tuple(js['inputs'][0]['shape'][1:]) blobs = {} input_layer = kr.Input(shape=shape, name=js['inputs'][0]['name']) blobs[js['inputs'][0]['name']] = input_layer for oper in js['operators']: tp = oper['type'] inp = oper['inputs'] out = oper['outputs'] op = oper.get('options', {}) if 'activation' in op: if op['activation'] == 'relu6': op['activation'] = 'relu' # workaround for missing layer - not important for benchmarks if tp == 'Convolution2D': ker, pad = _get_ker_pad(op) if pad * 2 + 1 != ker: padding = 'valid' prep = layers.ZeroPadding2D(padding=op['pad']) else: padding = 'same' prep = None activation = op.get('activation') if op.get('groups', 1) == op['channels_out'] and (op['groups'] == op.get( 'channels_in', op['channels_out'])): l = layers.DepthwiseConv2D(kernel_size=op['kernel'], strides=op['stride'], padding=padding, activation=activation, use_bias=op['bias']) elif op.get('groups', 1) == 1: l = layers.Convolution2D(filters=op['channels_out'], kernel_size=op['kernel'], strides=op.get('stride', 1), dilation_rate=op.get('dilate', 1), padding=padding, activation=activation, use_bias=op['bias']) else: raise Exception('Unsupported groups count') if prep: blobs[out[0]] = l(prep(blobs[inp[0]])) else: blobs[out[0]] = l(blobs[inp[0]]) elif tp == 'Pooling2D': ker, pad = _get_ker_pad(op) if pad * 2 + 1 != ker: padding = 'valid' prep = layers.ZeroPadding2D(padding=op['pad']) else: padding = 'same' prep = None mode = op.get('mode', 'max') if mode == 'max': l = layers.MaxPooling2D(pool_size=op['kernel'], strides=op['stride'], padding=padding) else: l = layers.AveragePooling2D(pool_size=op['kernel'], strides=op['stride'], padding=padding) if prep: blobs[out[0]] = l(prep(blobs[inp[0]])) else: blobs[out[0]] = l(blobs[inp[0]]) elif tp == 'BatchNorm': affine = op.get('affine', True) eps = op.get('eps', 1e-5) momentum = op.get('momentum', 0.1) l = layers.BatchNormalization(epsilon=eps, momentum=(1 - momentum), scale=affine, center=affine) blobs[out[0]] = l(blobs[inp[0]]) elif tp == 'Activation': blobs[out[0]] = layers.Activation(activation=op['activation'])( blobs[inp[0]]) elif tp == 'Elementwise' and op.get('operation', 'sum') == 'sum': act = op.get('activation', None) if act: blobs[out[0]] = layers.Activation(activation=op['activation'])( layers.Add()([blobs[inp[0]], blobs[inp[1]]])) else: blobs[out[0]] = layers.Add()([blobs[inp[0]], blobs[inp[1]]]) elif tp == 'GlobalPooling' and op.get('mode', 'max') == 'avg': blobs[out[0]] = layers.GlobalAveragePooling2D()(blobs[inp[0]]) elif tp == 'InnerProduct': if len(blobs[inp[0]].shape.dims) != 2: in_name = inp[0] + "_flatten" blobs[in_name] = layers.Flatten()(blobs[inp[0]]) else: in_name = inp[0] blobs[out[0]] = layers.Dense(op['outputs'], use_bias=op.get('bias', True), activation=op.get('activation'))( blobs[in_name]) elif tp == 'Softmax': blobs[out[0]] = layers.Softmax()(blobs[inp[0]]) elif tp == 'SoftmaxWithLoss': blobs[out[0]] = blobs[inp[0]] else: raise Exception("Unsupported layer %s/%s" % (tp, json.dumps(op))) print("Output Shape", blobs[out[0]]) return kr.Model(input_layer, blobs[js['outputs'][0]], name='amodel')
def build_model(char_size=27, dim=64, iterations=4, training=True, ilp=False, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=( None, None, None, ), name='context', dtype='int32') query = L.Input(shape=(None, ), name='query', dtype='int32') if ilp: context, query, templates = ilp print('Found %s texts.' % len(CONTEXT_TEXTS)) word_index = WORD_INDEX print('Found %s unique tokens.' % len(word_index)) embeddings_index = {} GLOVE_DIR = os.path.abspath('.') + "/data/glove" f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), 'r', encoding='utf-8') for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) EMBEDDING_DIM = 100 embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # Contextual embeddeding of symbols # onehot_weights = np.eye(char_size) # onehot_weights[0, 0] = 0 # Clear zero index # onehot = L.Embedding(char_size, char_size, # trainable=False, # weights=[onehot_weights], # name='onehot') embedding_layer = L.Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) embedded_ctx = embedding_layer( context) # (?, rules, preds, chars, char_size) embedded_q = embedding_layer(query) # (?, chars, char_size) if ilp: # Combine the templates with the context, (?, rules+temps, preds, chars, char_size) embedded_ctx = L.Lambda(lambda xs: K.concatenate(xs, axis=1), name='template_concat')( [templates, embedded_ctx]) # embedded_ctx = L.concatenate([templates, embedded_ctx], axis=1) embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred') embedded_predq = embed_pred(embedded_q) # (?, dim) # For every rule, for every predicate, embed the predicate embedded_ctx_preds = NestedTimeDist(NestedTimeDist(embed_pred, name='nest1'), name='nest2')(embedded_ctx) # (?, rules, preds, dim) embed_rule = ZeroGRU(dim, name='embed_rule') embedded_rules = NestedTimeDist(embed_rule, name='d_embed_rule')(embedded_ctx_preds) # (?, rules, dim) # Reused layers over iterations repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1], name='repeat_to_ctx') diff_sq = L.Lambda(lambda xy: K.square(xy[0] - xy[1]), output_shape=(None, dim), name='diff_sq') mult = L.Multiply() concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2), output_shape=(None, dim * 5), name='concat') att_densel = L.Dense(dim // 2, activation='tanh', name='att_densel') att_dense = L.Dense(1, name='att_dense') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') softmax1 = L.Softmax(axis=1) unifier = NestedTimeDist(ZeroGRU(dim, go_backwards=False, name='unifier'), name='dist_unifier') dot11 = L.Dot((1, 1)) # Reasoning iterations state = embedded_predq repeated_q = repeat_toctx(embedded_predq) outs = list() for _ in range(iterations): # Compute attention between rule and query state ctx_state = repeat_toctx(state) # (?, rules, dim) s_s_c = diff_sq([ctx_state, embedded_rules]) s_m_c = mult([embedded_rules, state]) # (?, rules, dim) sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q]) sim_vec = att_densel(sim_vec) # (?, rules, dim//2) sim_vec = att_dense(sim_vec) # (?, rules, 1) sim_vec = squeeze2(sim_vec) # (?, rules) sim_vec = softmax1(sim_vec) outs.append(sim_vec) # Unify every rule and weighted sum based on attention new_states = unifier(embedded_ctx_preds, initial_state=[state]) # (?, rules, dim) state = dot11([sim_vec, new_states]) # Predication out = L.Dense(1, activation='sigmoid', name='out')(state) if ilp: return outs, out elif pca: model = Model([context, query], [embedded_rules]) elif training: model = Model([context, query], [out]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1) repeat_e = repeat_e_layer(encoder_outputs) concat_for_score_layer = layers.Concatenate(axis=-1) concat_for_score = concat_for_score_layer([repeat_d, repeat_e]) dense1_t_score_layer = layers.Dense(latent_dim // 2, activation='tanh') dense1_score_layer = layers.TimeDistributed(dense1_t_score_layer) dense1_score = dense1_score_layer(concat_for_score) dense2_t_score_layer = layers.Dense(1) dense2_score_layer = layers.TimeDistributed(dense2_t_score_layer) dense2_score = dense2_score_layer(dense1_score) dense2_score = layers.Reshape( (max_decoder_seq_length, max_encoder_seq_length))(dense2_score) softmax_score_layer = layers.Softmax(axis=-1) softmax_score = softmax_score_layer(dense2_score) repeat_score_layer = RepeatVectorLayer(latent_dim, 2) repeat_score = repeat_score_layer(softmax_score) permute_e = layers.Permute((2, 1))(encoder_outputs) repeat_e_layer = RepeatVectorLayer(max_decoder_seq_length, 1) repeat_e = repeat_e_layer(permute_e) attended_mat_layer = layers.Multiply() attended_mat = attended_mat_layer([repeat_score, repeat_e]) context_layer = layers.Lambda(lambda x: K.sum(x, axis=-1), lambda x: tuple(x[:-1])) context = context_layer(attended_mat)
model = ResnetBase((32,32,3), activations.relu) model.add([ layers.Conv2D(kernel_size=(7,7), strides=(1,1), filters=64, padding="same"), layers.BatchNormalization(), layers.MaxPooling2D(pool_size=(2,2)) ], activation=tf.keras.activations.relu) model.add_conv_bn_block(filters=[64, 64], strides=(1,1), kernel_sizes=[(3,3), (3,3)]) model.add_conv_bn_block(filters=[64, 64], strides=(1,1), kernel_sizes=[(3,3), (3,3)]) model.add_conv_bn_block(filters=[128, 128], strides=(1,1), kernel_sizes=[(3,3), (3,3)]) model.add_conv_bn_block(filters=[128, 128], strides=(1,1), kernel_sizes=[(3,3), (3,3)]) model.add_conv_bn_block(filters=[256, 256], strides=(2,2), kernel_sizes=[(3,3), (3,3)]) model.add_conv_bn_block(filters=[256, 256], strides=(1,1), kernel_sizes=[(3,3), (3,3)]) model.add_conv_bn_block(filters=[512, 512], strides=(2,2), kernel_sizes=[(3,3), (3,3)]) model.add_conv_bn_block(filters=[512, 512], strides=(1,1), kernel_sizes=[(3,3), (3,3)]) model.add([ layers.AveragePooling2D(pool_size=(4,4)), layers.Flatten(), layers.Dense(10), layers.Softmax() ]) m = model.build_model() m.summary() m.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy']) # history = m.fit(X_train, y_train, epochs=20)