def decoding_layer(dec_input, embeddings, encoder_output, encoder_state, vocab_size, text_length, summary_length, max_summary_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers): for layer in range(num_layers): with tf.variable_scope('decoder_{}'.format(layer)): lstm = tf.contrib.rnn.LSTMCell( rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) output_layer = Dense(vocab_size, kernel_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.1)) attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( rnn_size, encoder_output, text_length, normalize=False, name='BahdanauAttention') lstm_attention = tf.contrib.seq2seq.AttentionWrapper( lstm, attention_mechanism, rnn_size) initial_state = lstm_attention.zero_state(batch_size, tf.float32) with tf.variable_scope('decode'): training_logits = decoding_layer_train(dec_input, summary_length, lstm_attention, initial_state, output_layer, vocab_size, max_summary_length, keep_prob) with tf.variable_scope('decode', reuse=True): inference_logits = decoding_layer_infer( embeddings, vocab_to_int['<GO>'], vocab_to_int['<EOS>'], lstm_attention, initial_state, output_layer, max_summary_length, batch_size, keep_prob) return training_logits, inference_logits
def __init__(self): super(Autoencoder, self).__init__() self.encoder = Encoder() # self.mu_layer = Dense(name='mu', units=1, use_bias=False) # self.log_var_layer = Dense(name='log_var', units=1, use_bias=False) self.decompress_1 = Dense(name='d_decompress_1', units=1024, activation=tf.nn.relu) self.decompress_2 = Dense(name='d_decompress_2', units=1024, activation=tf.nn.relu) # self.classifier_1 = Dense(name='ae_class_1', units = 256, activation=tf.nn.relu) # self.drop_1 = Dropout(0.5) # self.classifier_2 = Dense(name='ae_class_2', units = 256, activation=tf.nn.relu) # self.classifier_3 = Dense(name='ae_class_3', units = 2, activation=None) self.decoder = Decoder()
def __init__(self): super(LatentDiscriminator, self).__init__() self.fc_11 = Dense(name='ld_fc_11', units=512, activation=None, use_bias=True) self.fc_12 = Dense(name='ld_fc_12', units=512, activation=tf.nn.sigmoid, use_bias=True) self.drop_1 = Dropout(name='ld_drop_1', rate=0.5) self.fc_21 = Dense(name='ld_fc_21', units=256, activation=None, use_bias=True) self.fc_22 = Dense(name='ld_fc_22', units=256, activation=tf.nn.sigmoid, use_bias=True) self.drop_2 = Dropout(name='ld_drop_2', rate=0.5) self.fc_3 = Dense(name='ld_fc_3', units=128, activation=tf.nn.sigmoid, use_bias=True) self.classifier = Dense(name='ld_classifier', units=2, activation=tf.nn.softmax, use_bias=False)
def NN_stacking(X_train, y_train, X_test, y_test, report=True): """Returns neural networks model""" from tensorflow.layers import Dense, BatchNormalization from tensorflow.keras import Sequential from tensorflow import losses from tensorflow import set_random_seed set_random_seed(rand) blender = Sequential() blender.add(Dense(3, input_shape=(5, ))) blender.add(BatchNormalization()) blender.add(Dense(1, activation='relu')) blender.compile(optimizer='adam', loss=losses.mean_squared_error) blender.fit(np.array(X_train), np.array(y_train), epochs=3000, verbose=0) y_train_pred = blender.predict(np.array(X_train)) y_pred = blender.predict(np.array(X_test)) if report: print("\n\n========== [Stacking Report: NN stacking] ==========") mse = mean_squared_error(y_test, y_pred) print("NN Stacking Test Error(RMSE) : ", np.sqrt(mse)) return y_test, y_pred
def dense(inputs, out_length, use_bias=True): """ Dense connected layer Parameters ---------- inputs: Input tensor out_length: Length of outputs use_bias: Whether to use bias """ return Dense( units=out_length, use_bias=use_bias, kernel_initializer=init_ops.glorot_uniform_initializer, )(inputs)
def initialize_model(config, num_people, current_layer, is_training): for count, layer_conf in enumerate(config.model.layers): name = get_or_none(layer_conf, "name") with tf.variable_scope(layer_conf.scope, reuse=tf.AUTO_REUSE): if layer_conf.HasField("convolutional"): current_layer = relu( conv2d( current_layer, layer_conf.convolutional.filters, max(layer_conf.convolutional.kernel_size.width, layer_conf.convolutional.kernel_size.height), #data_format='channels_last', padding="same", scope="conv")) elif layer_conf.HasField("pool"): if layer_conf.pool.type == "max": current_layer = MaxPooling2D( (layer_conf.pool.size.width, layer_conf.pool.size.height), strides=(layer_conf.pool.size.width, layer_conf.pool.size.height), name=name)(current_layer) else: raise ValueError("Unsupported pool type:" + conv_config.pool_type) elif layer_conf.HasField("dense"): current_layer = Dense(layer_conf.dense.units, activation=str_to_activation( layer_conf.dense.activation), name=name)(current_layer) elif layer_conf.HasField("flatten"): current_layer = Flatten(name=name)(current_layer) elif layer_conf.HasField("dropout"): current_layer = Dropout(layer_conf.dropout.rate * is_training, name=name)(current_layer) elif layer_conf.HasField("transfer"): if count != 0: ValueError("Transfer layer must occur first.") # We're handling this outside now else: ValueError("Unsupported layer.") return current_layer
def construct_model(self, input_x, input_y): ct = self.cnn_trainable x = self.inception_part(input_x, ct) x = self.resnet_3d_part(x, ct) x = AveragePooling3D(pool_size=(self.frm_num//4, 7, 7), strides=(1, 1, 1), padding='valid', data_format=self.DATA_FORMAT, name='global_pool')(x) print(x) x = tf.reshape(x, shape=(-1, 512)) print(x) x = Dropout(0.3, name='dropout')(x) self.fc8 = Dense(400, trainable=ct, name='fc8') self.fc8_output = self.fc8(x) print(self.fc8_output) self.loss = sparse_softmax_cross_entropy_with_logits(logits=self.fc8_output, labels=self.input_y) self.top1_acc = in_top_k(predictions=self.fc8_output, targets=self.input_y, k=1) self.top1_acc = tf.reduce_mean(tf.cast(self.top1_acc, tf.float32), name='top1_accuracy') self.top5_acc = in_top_k(predictions=self.fc8_output, targets=self.input_y, k=5) self.top5_acc = tf.reduce_mean(tf.cast(self.top5_acc, tf.float32), name='top5_accuracy')
def _fully_connected(input, name): with tf.variable_scope(name): input = Flatten()(input) input = Dense(1)(input) return input
max_features = 10000 max_len = 500 batch_size = 32 (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), ' train sequences') print(len(x_test), 'test sequence') input_train = preprocessing.sequence.pad_sequences(x_train, maxlen=max_len) input_test = preprocessing.sequence.pad_sequences(x_test, maxlen=max_len) model = Sequential() model.add(tf.keras.layers.Embedding(max_features, 32)) model.add(tf.keras.layers.LSTM(32)) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) history = model.fit(input_train, y_train, epochs=20, batch_size=128, validation_split=0.2) import matplotlib.pyplot as plt acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) plt.plot(epochs, acc, 'bo', label='Train acc')
new_y_train = keras.utils.to_categorical(y_train, 10) new_y_test = keras.utils.to_categorical(y_test, 10) inputs = Input(shape=(new_x_train.shape[1], new_x_train.shape[2])) #these first layers are 'data augmentation' layers x = MyAddScale(name='scale_augment')(inputs) x = MyAdd2DRotation(name='rotate_augment')(x) x = MyAddShift(name='shift_augment')(x) x = MyAddJitter(name='jitter_augment')(x) #This is the ursa layer to create a feature vector x = MyUrsaMin(Nstars, name='cluster')(x) x = Activation('relu')(x) x = BatchNormalization()(x) #these last layers do classification x = Dense(512, activation='relu', name='dense512')(x) x = BatchNormalization()(x) x = Dense(256, activation='relu', name='dense256')(x) x = BatchNormalization()(x) x = Dropout(rate=0.3)(x) x = Dense(10, activation='softmax')(x) model = Model(inputs=inputs, outputs=x) if gpus > 1: from keras.utils import multi_gpu_model model = multi_gpu_model(model, gpus=gpus) rmsprop = tf.keras.optimizers.RMSprop(lr=.001, rho=.9, decay=.0001) model.compile(optimizer=rmsprop, loss='categorical_crossentropy', metrics=['accuracy'])
import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.layers import Dense, Conv2D, Flattem, Dropout, MaxPooling2D model = Sequential( Conv2D(128, (4, 4), padding='same', activation='relu', input_shape=(6, 7, 1)), MaxPooling2D(pool_size=(2, 2)), Dense(64, activation='relu'), Dense(64, activation='relu'), Dense(1)) optimizer = tf.keras.optimizers.Adam() model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy'])
def buildModel(self): T_in = self.args.T_in T_out = self.args.T_out D_in = self.args.D_in D_out = self.args.D_out E = self.args.embedding_dim H = self.args.hidden_dim SOS = self.args.SOS EOS = self.args.EOS PAD = self.args.PAD beam_width = 3 # Input with tf.name_scope('input'): x = tf.placeholder(shape=(None, T_in), dtype=tf.int32, name='encoder_inputs') # N, T_out y = tf.placeholder(shape=(None, T_out), dtype=tf.int32, name='decoder_inputs') # N x_len = tf.placeholder(shape=(None, ), dtype=tf.int32) # N y_len = tf.placeholder(shape=(None, ), dtype=tf.int32) # dynamic sample num batch_size = tf.shape(x)[0] # symbol mask sos = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * SOS eos = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * EOS pad = tf.ones(shape=(batch_size, 1), dtype=tf.int32) * PAD # input mask x_mask = tf.sequence_mask(x_len, T_in, dtype=tf.float32) y_with_sos_mask = tf.sequence_mask(y_len, T_out + 1, dtype=tf.float32) y_with_pad = tf.concat([y, pad], axis=1) eos_mask = tf.one_hot(y_len, depth=T_out + 1, dtype=tf.int32) * EOS # masked inputs y_with_eos = y_with_pad + eos_mask y_with_sos = tf.concat([sos, y], axis=1) ## Embedding with tf.name_scope('embedding'): if self.args.use_pretrained: embedding_pretrained = np.fromfile(self.args.pretrained_file, dtype=np.float32).reshape( (-1, E)) embedding = tf.Variable(embedding_pretrained, trainable=False) else: embedding = tf.get_variable(name='embedding', shape=(D_in, E), dtype=tf.float32, initializer=xavier_initializer()) e_x = tf.nn.embedding_lookup(embedding, x) e_y = tf.nn.embedding_lookup(embedding, y_with_sos) if self.args.mode == 'train': e_x = tf.nn.dropout(e_x, self.args.keep_prob) ## Encoder with tf.name_scope('encoder'): ## Multi-BiLSTM fw_cell = rnn.MultiRNNCell([ rnn.BasicLSTMCell(num_units=H) for i in range(self.args.layer_size) ]) bw_cell = rnn.MultiRNNCell([ rnn.BasicLSTMCell(num_units=H) for i in range(self.args.layer_size) ]) bi_encoder_output, bi_encoder_state = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, e_x, sequence_length=x_len, dtype=tf.float32, time_major=False, scope=None) encoder_output = bi_encoder_output[0] + bi_encoder_output[1] encoder_final_state = bi_encoder_state[0] ## Decoder with tf.name_scope('decoder'): decoder_cell = rnn.MultiRNNCell([ rnn.BasicLSTMCell(num_units=H) for i in range(self.args.layer_size) ]) decoder_lengths = tf.ones(shape=[batch_size], dtype=tf.int32) * (T_out + 1) ## Trainning decoder with tf.variable_scope('attention'): attention_mechanism = LuongAttention( num_units=H, memory=encoder_output, memory_sequence_length=x_len, name='attention_fn') projection_layer = Dense(units=D_out, kernel_initializer=xavier_initializer()) train_decoder_cell = AttentionWrapper( cell=decoder_cell, attention_mechanism=attention_mechanism, attention_layer_size=H) train_decoder_init_state = train_decoder_cell.zero_state( batch_size=batch_size, dtype=tf.float32).clone(cell_state=encoder_final_state) training_helper = TrainingHelper(e_y, decoder_lengths, time_major=False) train_decoder = BasicDecoder( cell=train_decoder_cell, helper=training_helper, initial_state=train_decoder_init_state, output_layer=projection_layer) train_decoder_outputs, _, _ = dynamic_decode( train_decoder, impute_finished=True, maximum_iterations=T_out + 1) # N, T_out+1, D_out train_decoder_outputs = ln(train_decoder_outputs.rnn_output) ## Beam_search decoder beam_memory = tile_batch(encoder_output, beam_width) beam_memory_state = tile_batch(encoder_final_state, beam_width) beam_memory_length = tile_batch(x_len, beam_width) with tf.variable_scope('attention', reuse=True): beam_attention_mechanism = LuongAttention( num_units=H, memory=beam_memory, memory_sequence_length=beam_memory_length, name='attention_fn') beam_decoder_cell = AttentionWrapper( cell=decoder_cell, attention_mechanism=beam_attention_mechanism, attention_layer_size=None) beam_decoder_init_state = beam_decoder_cell.zero_state( batch_size=batch_size * beam_width, dtype=tf.float32).clone(cell_state=beam_memory_state) start_tokens = tf.ones((batch_size), dtype=tf.int32) * SOS beam_decoder = BeamSearchDecoder( cell=beam_decoder_cell, embedding=embedding, start_tokens=start_tokens, end_token=EOS, initial_state=beam_decoder_init_state, beam_width=beam_width, output_layer=projection_layer) beam_decoder_outputs, _, _ = dynamic_decode( beam_decoder, scope=tf.get_variable_scope(), maximum_iterations=T_out + 1) beam_decoder_result_ids = beam_decoder_outputs.predicted_ids with tf.name_scope('loss'): logits = tf.nn.softmax(train_decoder_outputs) cross_entropy = tf.keras.losses.sparse_categorical_crossentropy( y_with_eos, logits) loss_mask = tf.sequence_mask(y_len + 1, T_out + 1, dtype=tf.float32) loss = tf.reduce_sum(cross_entropy * loss_mask) / tf.cast( batch_size, dtype=tf.float32) prediction = tf.argmax(logits, 2) ## train_op with tf.name_scope('train'): global_step = tf.train.get_or_create_global_step() lr = noam_scheme(self.args.lr, global_step, self.args.warmup_steps) optimizer = tf.train.AdamOptimizer(lr) ## gradient clips trainable_params = tf.trainable_variables() gradients = tf.gradients(loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm( gradients, self.args.gradient_clip_num) train_op = optimizer.apply_gradients(zip(clip_gradients, trainable_params), global_step=global_step) # Summary with tf.name_scope('summary'): tf.summary.scalar('lr', lr) tf.summary.scalar('loss', loss) tf.summary.scalar('global_step', global_step) summaries = tf.summary.merge_all() return x, y, x_len, y_len, logits, loss, prediction, beam_decoder_result_ids, global_step, train_op, summaries
def BuildModel(self, modelName): with tf.variable_scope(modelName): kernelInit = tf.contrib.layers.xavier_initializer() frames = tf.placeholder(dtype=tf.float32, shape=(None, ) + DdqnGlobals.STATE_DIMENSIONS, name=modelName + 'frames') conv_1 = self.BuildConv2D( ConvArgs(layerInput=frames, numFilters=32, filterSize=8, stride=4, init=kernelInit, namePrefix=modelName + 'c1'), modelName) conv_2 = self.BuildConv2D( ConvArgs(layerInput=conv_1, numFilters=64, filterSize=4, stride=2, init=kernelInit, namePrefix=modelName + 'c2'), modelName) conv_3 = self.BuildConv2D( ConvArgs(layerInput=conv_2, numFilters=64, filterSize=3, stride=1, init=kernelInit, namePrefix=modelName + 'c3'), modelName) conv_flattened = tf.layers.Flatten()(conv_3) # Split into dueling networks xavierInit = tf.contrib.layers.xavier_initializer() #ADVANTAGE advantageInput = Dense(units=512, activation='relu', kernel_initializer=kernelInit, name=modelName + 'advantageInput')(conv_flattened) advantage = Dense(self.action_size, activation='relu', kernel_initializer=xavierInit, name=modelName + 'advantage')(advantageInput) # VALUE valueInput = Dense(units=512, activation='relu', kernel_initializer=kernelInit, name=modelName + 'valueInput')(conv_flattened) value = Dense(1, kernel_initializer=xavierInit, name=modelName + 'value')(valueInput) # Rejoin into single network advantageDiff = tf.subtract( advantage, tf.reduce_mean(advantage, axis=1, keepdims=True)) policy = advantageDiff + value # "The output layer is a fully-connected linear layer # with a single output for each valid action." rawOutput = Dense(self.action_size, kernel_initializer=kernelInit, name=modelName + 'rawOutput')(policy) # Finally, we multiply the output by the mask! actionsMask = tf.placeholder(dtype=tf.float32, shape=((None, self.action_size)), name=modelName + 'actionsMask') filteredOutput = tf.multiply(rawOutput, actionsMask) targetQ = tf.placeholder(dtype=tf.float32, shape=((None, self.action_size)), name=modelName + 'targetQ') cost = tf.losses.huber_loss(targetQ, filteredOutput) optimizer = tf.train.AdamOptimizer( learning_rate=self.Params.learning_rate).minimize(cost) modelInfo = ModelInfo(modelName=modelName, frames=frames, actionsMask=actionsMask, filteredOutput=filteredOutput, targetQ=targetQ, cost=cost, optimizer=optimizer) return modelInfo
NUM_CHANNELS = 64 BN1 = BatchNormalization() BN2 = BatchNormalization() BN3 = BatchNormalization() BN4 = BatchNormalization() BN5 = BatchNormalization() BN6 = BatchNormalization() CONV1 = Conv2D(NUM_CHANNELS, kernel_size=3, strides=1, padding='same') CONV2 = Conv2D(NUM_CHANNELS, kernel_size=3, strides=1, padding='same') CONV3 = Conv2D(NUM_CHANNELS, kernel_size=3, strides=1) CONV4 = Conv2D(NUM_CHANNELS, kernel_size=3, strides=1) FC1 = Dense(128) FC2 = Dense(64) FC3 = Dense(7) DROP1 = Dropout(0.3) DROP2 = Dropout(0.3) # 6x7 input # https://github.com/PaddlePaddle/PARL/blob/0915559a1dd1b9de74ddd2b261e2a4accd0cd96a/benchmark/torch/AlphaZero/submission_template.py#L496 def modified_cnn(inputs, **kwargs): relu = tf.nn.relu log_softmax = tf.nn.log_softmax layer_1_out = relu(BN1(CONV1(inputs))) layer_2_out = relu(BN2(CONV2(layer_1_out)))
def __init__(self): super(Discriminator, self).__init__() arg = {'activation': tf.nn.relu, 'padding': 'same'} self.conv_11 = Conv2D(name='di_conv_11', filters=64, kernel_size=(5, 5), strides=(2, 2), **arg) self.conv_12 = Conv2D(name='di_conv_12', filters=64, kernel_size=(3, 3), strides=(1, 1), **arg) self.conv_13 = Conv2D(name='di_conv_13', filters=64, kernel_size=(3, 3), strides=(1, 1), **arg) self.pool_1 = MaxPooling2D(name='di_pool_1', pool_size=(5, 5), strides=(2, 2), padding='same') self.drop_1 = Dropout(0.5) self.conv_21 = Conv2D(name='di_conv_21', filters=128, kernel_size=(3, 3), strides=(1, 1), **arg) self.conv_22 = Conv2D(name='di_conv_22', filters=128, kernel_size=(3, 3), strides=(1, 1), **arg) self.conv_23 = Conv2D(name='di_conv_23', filters=128, kernel_size=(3, 3), strides=(1, 1), **arg) self.pool_2 = MaxPooling2D(name='di_pool_2', pool_size=(3, 3), strides=(2, 2), padding='same') self.drop_2 = Dropout(0.5) self.conv_31 = Conv2D(name='di_conv_31', filters=256, kernel_size=(3, 3), strides=(2, 2), **arg) self.conv_32 = Conv2D(name='di_conv_32', filters=256, kernel_size=(3, 3), strides=(1, 1), **arg) self.conv_33 = Conv2D(name='di_conv_33', filters=256, kernel_size=(3, 3), strides=(1, 1), **arg) self.pool_3 = MaxPooling2D(name='di_pool_3', pool_size=(3, 3), strides=(2, 2), padding='same') self.drop_3 = Dropout(0.5) self.flattener = Flatten() self.drop_4 = Dropout(0.5) self.classifier_1 = Dense(name='di_cls_1', units=512, activation=tf.nn.relu, use_bias=True) self.drop_5 = Dropout(0.5) self.classifier_2 = Dense(name='di_cls_2', units=256, activation=tf.nn.relu, use_bias=True) self.classifier_3 = Dense(name='di_cls_3', units=2, activation=None, use_bias=True)
def __init__(self): super(Encoder, self).__init__() arg = {'activation': tf.nn.relu, 'padding': 'same'} self.conv_11 = Conv2D(name='e_conv_11', filters=64, kernel_size=7, strides=(2, 2), **arg) self.conv_12 = Conv2D(name='e_conv_12', filters=64, kernel_size=7, strides=(2, 2), **arg) self.pool_1 = MaxPooling2D(name='e_pool_1', pool_size=4, strides=(2, 2), padding='same') self.compress_11 = AveragePooling2D(name='e_comp_11', pool_size=5, strides=(3, 3), padding='same') self.compress_12 = Flatten() self.compress_13 = Dense(name='e_comp_13', units=128, activation=None, use_bias=False) # activity_regularizer=tf.keras.regularizers.l2(l=0.01)) self.batch_norm_1 = BatchNormalization(name='e_bn_1') self.drop_1 = Dropout(name='e_drop_1', rate=0.5) self.conv_21 = Conv2D(name='e_conv_21', filters=128, kernel_size=5, strides=(1, 1), **arg) self.conv_22 = Conv2D(name='e_conv_22', filters=128, kernel_size=5, strides=(1, 1), **arg) self.pool_2 = MaxPooling2D(name='e_pool_2', pool_size=4, strides=(2, 2), padding='same') self.compress_21 = AveragePooling2D(name='e_comp_21', pool_size=5, strides=(3, 3), padding='same') self.compress_22 = Flatten() self.compress_23 = Dense(name='e_comp_23', units=128, activation=None, use_bias=False) # activity_regularizer=tf.keras.regularizers.l2(l=0.01)) self.batch_norm_2 = BatchNormalization(name='e_bn_2') self.drop_2 = Dropout(name='e_drop_2', rate=0.5) self.conv_31 = Conv2D(name='e_conv_31', filters=256, kernel_size=3, strides=(1, 1), **arg) self.conv_32 = Conv2D(name='e_conv_32', filters=256, kernel_size=3, strides=(1, 1), **arg) self.pool_3 = MaxPooling2D(name='e_pool_3', pool_size=2, strides=(2, 2), padding='same') self.compress_31 = AveragePooling2D(name='e_comp_31', pool_size=3, strides=(1, 1), padding='same') self.compress_32 = Flatten() self.compress_33 = Dense(name='e_comp_33', units=128, activation=None, use_bias=False) # activity_regularizer=tf.keras.regularizers.l2(l=0.01)) self.batch_norm_3 = BatchNormalization(name='e_bn_3') self.drop_3 = Dropout(name='e_drop_3', rate=0.5)