def inference(self): """ Build inference pipeline, going from the story and question, through the memory cells, to the distribution over possible answers. """ # Story Input Encoder story_embeddings = tf.nn.embedding_lookup(self.E, self.S) # Shape: [None, story_len, sent_len, embed_sz] # story_embeddings = tf.nn.dropout(story_embeddings, self.keep_prob) # Shape: [None, story_len, sent_len, embed_sz] story_embeddings = tf.multiply(story_embeddings, self.story_mask) self.story_embeddings = tf.reduce_sum(story_embeddings, axis=[2]) # Shape: [None, story_len, embed_sz] # Query Input Encoder query_embedding = tf.nn.embedding_lookup(self.E, self.Q) # Shape: [None, sent_len, embed_sz] query_embedding = tf.multiply(query_embedding, self.query_mask) # Shape: [None, sent_len, embed_sz] self.query_embedding = tf.reduce_sum(query_embedding, axis=[1]) # Shape: [None, embed_sz] ## to input into a dynacmicRNN we need to specify the lenght of each sentence # length = tf.cast(tf.reduce_su2m(tf.sign(tf.reduce_max(tf.abs(self.S), axis=2)), axis=1), tf.int32) self.length = self.get_sequence_length() # Create Memory Cell self.cell = DynamicMemoryCell(self.num_blocks, self.embedding_size, self.keys, self.query_embedding) # self.cell =tf.contrib.rnn.DropoutWrapper(self.cell, output_keep_prob=self.keep_prob) # Send Story through Memory Cell initial_state = self.cell.zero_state(self.batch_size, dtype=tf.float32) self.out, memories = tf.nn.dynamic_rnn(self.cell, self.story_embeddings, sequence_length=self.length, initial_state=initial_state) # Output Module # stacked_memories = tf.stack(memories, axis=1) stacked_memories = tf.stack(tf.split(memories, self.num_blocks, 1), 1) # Generate Memory Scores p_scores = softmax(tf.reduce_sum(tf.multiply(stacked_memories, tf.expand_dims(self.query_embedding,1)), axis=[2])) # Shape: [None, mem_slots] # Subtract max for numerical stability (softmax is shift invariant) p_max = tf.reduce_max(p_scores, axis=-1, keep_dims=True) attention = tf.nn.softmax(p_scores - p_max) attention = tf.expand_dims(attention, 2) # Shape: [None, mem_slots, 1] # Weight memories by attention vectors u = tf.reduce_sum(tf.multiply(stacked_memories, attention), axis=1) # Shape: [None, embed_sz] # Output Transformations => Logits hidden = self.activation(tf.matmul(u, self.H) + tf.squeeze(self.query_embedding)) # Shape: [None, embed_sz] logits = tf.matmul(hidden, self.R) return logits
def __neural_network_model(self, user_class_weight): print('Building convolutional network...') X = tf.placeholder(tf.float32, [None, 12], name="X") Y = tf.placeholder(tf.float32, [None, 2], name="Y") MLP1 = fully_connected(X, 64, activation=tf.nn.elu, regularizer="L2", name="MLP1") MLP2 = fully_connected(MLP1, 2, activation='linear', regularizer="L2", name="MLP2") output = softmax(MLP2) accuracy = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1)), tf.float32), name='acc') class_weights = tf.constant([[user_class_weight, 1.0]]) # deduce weights for batch samples based on their true label weights = tf.reduce_sum(class_weights * Y, axis=1) # compute your (unweighted) softmax cross entropy loss unweighted_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=Y, logits=MLP2) # apply the weights, relying on broadcasting of the multiplication weighted_losses = unweighted_losses * weights # reduce the result to get your final loss loss = tf.reduce_mean(weighted_losses) return { 'X': X, 'Y': Y, 'output': output, 'accuracy': accuracy, 'loss': loss }
def inference(self): """ Build inference pipeline, going from the story and question, through the memory cells, to the distribution over possible answers. """ # Story Input Encoder story_embeddings = tf.nn.embedding_lookup(self.E, self.S) # Shape: [None, story_len, sent_len, embed_sz] story_embeddings = tf.multiply(story_embeddings, self.story_mask) # Shape: [None, story_len, sent_len, embed_sz] story_embeddings = tf.reduce_sum(story_embeddings, axis=[2]) # Shape: [None, story_len, embed_sz] # Query Input Encoder query_embedding = tf.nn.embedding_lookup(self.E, self.Q) # Shape: [None, sent_len, embed_sz] query_embedding = tf.multiply(query_embedding, self.query_mask) # Shape: [None, sent_len, embed_sz] query_embedding = tf.reduce_sum(query_embedding, axis=[1]) # Shape: [None, embed_sz] # Send Story through Memory Cell initial_state = self.cell.zero_state(self.bsz, dtype=tf.float32) _, memories = tf.nn.dynamic_rnn(self.cell, story_embeddings, sequence_length=self.S_len, initial_state=initial_state) # Output Module stacked_memories = tf.stack(memories, axis=1) # Generate Memory Scores p_scores = softmax(tf.reduce_sum(tf.multiply(stacked_memories, # Shape: [None, mem_slots] tf.expand_dims(query_embedding, 1)), axis=[2])) # Subtract max for numerical stability (softmax is shift invariant) p_max = tf.reduce_max(p_scores, axis=-1, keep_dims=True) attention = tf.nn.softmax(p_scores - p_max) attention = tf.expand_dims(attention, 2) # Shape: [None, mem_slots, 1] # Weight memories by attention vectors u = tf.reduce_sum(tf.multiply(stacked_memories, attention), axis=1) # Shape: [None, embed_sz] # Output Transformations => Logits hidden = prelu(tf.matmul(u, self.H) + query_embedding) # Shape: [None, embed_sz] logits = tf.matmul(hidden, self.R) # Shape: [None, vocab_sz] return logits
def CRNN(window=1500,nLabels=3,downsampleSecond=True,featureMap=False): #Residual block function adapted from TFLearn: #https://github.com/tflearn/tflearn/blob/master/tflearn/layers/conv.py def residual_block_1D(incoming,out_channels,downsample=False, first=False, filt_len=16, dropout_prob=0.85, downsampleSecond=True): resnet = incoming in_channels = incoming.shape[-1].value strides = (2 if downsample else 1) dsLayer = (1 if downsampleSecond else 0) identity = resnet nConv = 2 if first: resnet = conv_1d(resnet, out_channels, filt_len, strides,weights_init="variance_scaling") nConv = 1 for i in range(nConv): resnet = batch_normalization(resnet) resnet = relu(resnet) resnet = dropout(resnet, dropout_prob) if downsample and i==dsLayer: #1 as in, second layer resnet = conv_1d(resnet,out_channels,filt_len, strides=1, weights_init="variance_scaling") #puts the downsampling on the first conv layer only else: resnet = conv_1d(resnet,out_channels,filt_len, strides, weights_init="variance_scaling") #Beginning of skip connection identity = max_pool_1d(identity,strides, strides) if in_channels != out_channels: ch = (out_channels - in_channels) // 2 identity = tf.pad(identity,[[0,0],[0,0],[ch,ch]]) in_channels = out_channels resnet = resnet + identity return resnet #Begin construction of network net = input_data(shape=[None, window, 1]) net = conv_1d(net, 64, 16, weights_init="variance_scaling") net = batch_normalization(net) net = relu(net) dropoutProb = 0.5 net = residual_block_1D(net, 64, first=True, dropout_prob=dropoutProb) for i in range(0,4): downsample = (i%2 == 0) k = ((i+1)//4)+1 net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb) res1 = net for i in range(4, 8): downsample = (i%2 == 0) k = ((i+1)//4)+1 net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb) res2 = net for i in range(8,12): downsample = (i%2 == 0) k = ((i+1)//4)+1 net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb) res3 = net for i in range(12, 15): downsample = (i%2 == 0) k = ((i+1)//4)+1 net = residual_block_1D(net, 64*k, downsample, downsampleSecond=downsampleSecond, dropout_prob=dropoutProb) res4 = net net = batch_normalization(net) net = relu(net) net = fully_connected(net, nLabels) net = softmax(net) net = regression(net, optimizer='adam',loss='categorical_crossentropy',learning_rate=0.001, shuffle_batches=False) #Return intermediary activations if featureMap: return res1, res2, res3, res4, net else: return net
# Convolutional network building net = input_data(shape=[None, 32, 32, 3], data_preprocessing=img_prep, data_augmentation=img_aug) filters = [64, 128, 256, 512] for f in filters: net = fractal_conv2d(net, 4, f, 3, normalizer_fn=batch_normalization) net = slim.max_pool2d(net, 2, 2) net = fractal_conv2d(net, 4, 512, 2, normalizer_fn=batch_normalization) net = conv_2d(net, 10, 1) net = global_avg_pool(net) net = softmax(net) net = regression(net, optimizer='adam', loss='categorical_crossentropy', learning_rate=.002) # Train using classifier model = tflearn.DNN(net, tensorboard_verbose=0) model.fit(X, Y, n_epoch=400, shuffle=True, validation_set=(X_test, Y_test), show_metric=True, batch_size=32,
img_aug.add_random_flip_leftright() img_aug.add_random_rotation(max_angle=25.) # Convolutional network building net = input_data(shape=[None, 32, 32, 3], data_preprocessing=img_prep, data_augmentation=img_aug) filters = [64,128,256,512] for f in filters: net = fractal_conv2d(net, 4, f, 3, normalizer_fn=batch_normalization) net = slim.max_pool2d(net,2, 2) net = fractal_conv2d(net, 4, 512, 2, normalizer_fn=batch_normalization) net = conv_2d(net, 10, 1) net = global_avg_pool(net) net = softmax(net) net = regression(net, optimizer='adam', loss='categorical_crossentropy', learning_rate=.002) # Train using classifier model = tflearn.DNN(net, tensorboard_verbose=0) model.fit(X, Y, n_epoch=400, shuffle=True, validation_set=(X_test, Y_test), show_metric=True, batch_size=32, run_id='cifar10_cnn')