def conv_block(input, channels, dropout_flag, dropout_rate, laxer_idx, stride_input=1, k_size=3, padding_type='SAME'): # Traditional 3D conv layer followed by batch norm and relu activation i_size = input.get_shape().as_list()[-2] / stride_input weights = ops.weight([k_size, k_size, k_size, channels[0], channels[1]], layer_name='wcnn' + str(laxer_idx + 1), reuse=tf.get_variable_scope().reuse) bias = ops.bias([i_size, i_size, i_size, channels[1]], layer_name='bcnn' + str(laxer_idx + 1), reuse=tf.get_variable_scope().reuse) conv_output = tf.add( ops.conv3d(input, weights, stride=[stride_input, stride_input, stride_input], padding=padding_type), bias) conv_output = ops.batch_norm(conv_output) conv_output = ops.relu(conv_output) if dropout_flag: conv_output = Dropout(conv_output, keep_prob=dropout_rate) return conv_output
def out_block(input_anc, input_pos, channels, laxer_idx, stride_input=1, k_size=8, padding_type='VALID'): # Last conv layer, flatten the output weights = ops.weight([k_size, k_size, k_size, channels[0], channels[1]], layer_name='wcnn' + str(laxer_idx + 1)) bias = ops.bias([1, 1, 1, channels[1]], layer_name='bcnn' + str(laxer_idx + 1)) conv_output_anc = tf.add( ops.conv3d(input_anc, weights, stride=[stride_input, stride_input, stride_input], padding=padding_type), bias) conv_output_pos = tf.add( ops.conv3d(input_pos, weights, stride=[stride_input, stride_input, stride_input], padding=padding_type), bias) conv_output_anc = ops.batch_norm(conv_output_anc) conv_output_pos = ops.batch_norm(conv_output_pos) conv_output_anc = tf.contrib.layers.flatten(conv_output_anc) conv_output_pos = tf.contrib.layers.flatten(conv_output_pos) return conv_output_anc, conv_output_pos
def build_model(self, forward_only): print("[*] Building a PTRModel math model") with tf.variable_scope(self.scope): #embedding_matrix = tf.eye(self.input_dim, self.W) #embedding_matrix = weight('embedding', [self.input_dim, self.W], init='xavier') self.exp = weight('exp', [1, 1], init='constant', value=1.6) self.exp_weight = tf.constant( [[1.0]], dtype=tf.float32) #weight('exp_weight', [1, 1]) self.exp_bias = weight('exp_bias', [1, 1], init='constant', value=0.0) self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1])) prev_state = self.controller.init_state() tf.get_variable_scope().reuse_variables() for seq_length in range(1, self.max_length + 1): input_1 = tf.placeholder(tf.float32, [self.input_dim], name='input_1_%s' % seq_length) true_output = tf.placeholder(tf.float32, [self.output_dim], name='true_output_%s' % seq_length) self.inputs_1.append(input_1) self.true_outputs.append(true_output) # present inputs prev_state = self.controller.update_memory( prev_state, [ tf.reshape(input_1, [1, -1]), tf.reshape(input_1, [1, -1]) ]) self.collect_states[seq_length] = self.collect_states[ seq_length - 1][0:(seq_length - 1)] + [self.copy_state(prev_state)] state = prev_state self.prev_states[seq_length] = state stops = [] candidate_outputs = [] for j in range(self.MAX_STEP): state, _ = self.controller(state, j) self.collect_states[seq_length].append( self.copy_state(state)) candidate_outputs.append( tf.unstack(state['M'][-1][0:seq_length])) # stops.append(state['stop']) self.outputs[seq_length] = candidate_outputs self.stops[seq_length] = stops if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): print(" [*] Building a loss model for seq_length %s" % seq_length) all_losses = [] for index in range(self.MAX_STEP): loss = sequence_loss( logits=self.outputs[seq_length][index], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) all_losses.append(loss) all_losses = tf.stack(all_losses) #step_dist = tf.nn.softmax(tf.concat(self.stops[seq_length], 1)) #step_dist = tf.nn.softmax(tf.nn.embedding_lookup(self.length2stepdist, [seq_length])) #max_pos = tf.to_float(tf.argmax(step_dist)) max_pos = tf.clip_by_value( self.exp_weight * tf.pow(tf.to_float(seq_length), self.exp) + self.exp_bias, 0, self.MAX_STEP - 1) stop_pos = D(self.MAX_STEP, max_pos, 1, self.beta) loss1 = tf.reduce_sum( tf.expand_dims(all_losses, 0) * stop_pos) + 0.001 * tf.reduce_sum(max_pos) self.losses[seq_length] = loss1 if not self.params: self.params = tf.trainable_variables() grads = [] for grad in tf.gradients( loss1, self.params ): # + self.weight_decay*tf.add_n(tf.get_collection('l2')) if grad is not None: grads.append( tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads with tf.variable_scope("opt", reuse=None): if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): self.optims[seq_length] = self.opt.apply_gradients( zip(self.grads[seq_length], self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel math model finished")
def build_model(self, forward_only): print("[*] Building a PTRModel math model") with tf.variable_scope(self.scope): self.a = weight('a', [1, 1]) # self.c = weight('c', [1, 1]) # self.d = weight('d', [1, 1]) self.b = weight('b', [1, 1], init='constant') self.beta = 1 + tf.nn.softplus(weight('beta', [1, 1])) for seq_length in range(1, self.max_length * self.max_length + 1): true_output = tf.placeholder( tf.float32, [batch_size, self.output_dim], name='true_output_%s' % seq_length) self.true_outputs.append(true_output) prev_state = self.controller.init_state( self.true_outputs[0].get_shape()[0]) tf.get_variable_scope().reuse_variables() for seq_length in range(1, self.max_length + 1): input_1 = tf.placeholder(tf.float32, [batch_size, self.input_dim], name='input_1_%s' % seq_length) input_2 = tf.placeholder(tf.float32, [batch_size, self.input_dim], name='input_2_%s' % seq_length) self.inputs_1.append(input_1) self.inputs_2.append(input_2) # present inputs prev_state = self.controller.update_memory( prev_state, [ tf.reshape(input_1, [batch_size, 1, -1]), tf.reshape(input_2, [batch_size, 1, -1]), tf.zeros((batch_size, 1, self.W)) ]) self.collect_states[seq_length] = self.collect_states[ seq_length - 1][0:(seq_length - 1)] + [self.copy_state(prev_state)] self.debug[seq_length] = [] state = prev_state self.prev_states[seq_length] = state candidate_outputs = [] for j in range(0, self.MAX_STEP): state = self.controller(state, j) new_state = self.copy_state(state) self.collect_states[seq_length].append(new_state) # print(state['M'][-1][:,0:(seq_length*seq_length)].get_shape()) candidate_outputs.append( tf.unstack( tf.transpose( state['M'][-1][:, 0:(seq_length * seq_length)], [1, 0, 2]))) # self.debug[seq_length].append((new_state['ptr'],new_state['dptr'])) self.outputs[seq_length] = candidate_outputs if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): print(" [*] Building a loss model for seq_length %s" % seq_length) print(len(self.outputs[seq_length]), len(self.true_outputs[0:seq_length * seq_length]), len([1] * (seq_length * seq_length))) # print(self.outputs[seq_length][0].shape,self.true_outputs[0:2*seq_length][0].shape,len([1] * (2*seq_length))) all_losses = [] for index in range(self.MAX_STEP): # print(len(self.outputs[seq_length][index]), self.outputs[seq_length][index][0].get_shape()) # print(len(tf.unstack(self.true_outputs[0:seq_length*seq_length])), tf.unstack(self.true_outputs[0:seq_length*seq_length])[0].get_shape()) loss = sequence_loss( logits=self.outputs[seq_length][index], targets=tf.unstack(self.true_outputs[0:seq_length * seq_length]), weights=[[1] * batch_size] * seq_length * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) all_losses.append(loss) all_losses = tf.stack(all_losses) cn = tf.pow(tf.to_float(seq_length), self.a) + self.b max_pos = tf.clip_by_value(cn, 0, self.MAX_STEP - 1) stop_pos = D(self.MAX_STEP, tf.tile(max_pos, [batch_size, 1]), 1, self.beta) loss1 = tf.reduce_sum( tf.expand_dims(all_losses, 0) * stop_pos) + 0.0001 * tf.reduce_sum(cn) self.losses[seq_length] = loss1 if not self.params: self.params = tf.trainable_variables() grads = [] for grad in tf.gradients( loss1, self.params ): # + self.weight_decay*tf.add_n(tf.get_collection('l2')) if grad is not None: grads.append( tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads with tf.variable_scope("opt", reuse=None): if not forward_only: for seq_length in range(self.min_length, self.max_length + 1): self.optims[seq_length] = self.opt.apply_gradients( zip(self.grads[seq_length], self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel math model finished")
def build_model(self, forward_only): print("[*] Building a PTRModel math model") with tf.variable_scope(self.scope): self.a = weight('a', [1,1]) # self.c = weight('c', [1,1]) # self.d = weight('d', [1,1]) self.b = weight('b', [1, 1], init = 'constant') self.beta = 1 + tf.nn.softplus(weight('beta', [1,1])) prev_state = self.controller.init_state() tf.get_variable_scope().reuse_variables() for seq_length in range(1, self.max_length + 1): true_output = tf.placeholder(tf.float32, [self.output_dim], name='true_output_%s' % seq_length) self.true_outputs.append(true_output) for seq_length in range(1, self.max_length + 1): input_1 = tf.placeholder(tf.float32, [self.input_dim], name='input_1_%s' % seq_length) self.inputs_1.append(input_1) # present inputs prev_state = self.controller.update_memory(prev_state, [tf.reshape(input_1, [1, -1]), tf.zeros((1, self.W))]) self.collect_states[seq_length] = self.collect_states[seq_length-1][0:(seq_length-1)] + [self.copy_state(prev_state)] self.debug[seq_length] = [] state = prev_state self.prev_states[seq_length] = state for j in range(seq_length): state,_ = self.controller(state,j) new_state = self.copy_state(state) self.collect_states[seq_length].append(new_state) self.debug[seq_length].append((new_state['ptr'],new_state['dptr'])) self.outputs[seq_length] = tf.unstack(state['M'][-1][0:seq_length]) if not forward_only: for seq_length in range(self.min_length, self.max_length ): print(" [*] Building a loss model for seq_length %s" % seq_length) print(len(self.outputs[seq_length]),len(self.true_outputs[0:seq_length]),len([1] * (seq_length))) loss = sequence_loss(logits=self.outputs[seq_length] , targets=self.true_outputs[0:seq_length], weights=[1] * (seq_length), average_across_timesteps=False, average_across_batch=False, softmax_loss_function=l2_loss) self.losses[seq_length] = loss if not self.params: self.params = tf.trainable_variables() grads = [] for grad in tf.gradients(loss, self.params): # + self.weight_decay*tf.add_n(tf.get_collection('l2')) if grad is not None: grads.append(tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads with tf.variable_scope("opt", reuse=None): if not forward_only: for seq_length in range(self.min_length, self.max_length ): self.optims[seq_length] = self.opt.apply_gradients( zip(self.grads[seq_length], self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel math model finished")
def build_model(self, forward_only): print("[*] Building a PTRModel QA model") self.storys = tf.placeholder(tf.int32, [self.batch_size, self.story_size, self.input_dim], name='story') self.querys = tf.placeholder(tf.int32, [self.batch_size, self.input_dim], name='query') self.labels = tf.placeholder(tf.int32, [self.batch_size], name='label') self.embedding_matrix = weight('embedding', [self.output_dim, self.W], init='xavier') self.mask = tf.ones([self.input_dim, self.W]) #weight('mask', [self.input_dim, self.W], init='xavier') self.decoding_weight = weight('decoding_weight', [self.W, self.output_dim], init='xavier') self.decoding_bias = weight('decoding_bias', [self.output_dim], init='constant') zeros = np.zeros(self.W, dtype=np.float32) with tf.variable_scope(self.scope): init_state = self.controller.init_state() ss, qs = self.embedding(self.storys, self.querys) tf.get_variable_scope().reuse_variables() for i in range(self.batch_size): progress(i/float(self.batch_size)) state = init_state for sid in range(self.story_size): input_ = ss[i, sid:sid+1, :] state = self.controller.update_memory(state, [input_, input_]) # present inputs state['R'] = qs[i:i+1, :] outputs = [] # present targets for _ in range(self.max_hops): state = self.controller(state) outputs.append(self.decode(state['R'])) #out = tf.reduce_sum(tf.concat(outputs, 0), 0, keep_dims=True) out = outputs[-1] self.outputs.append(out) if not forward_only: logits = tf.concat(self.outputs, 0) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels) self.loss = tf.reduce_mean(cross_entropy) predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, self.labels) self.num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) if not self.params: self.params = tf.trainable_variables() self.grads = [] for grad in tf.gradients(self.loss, self.params): if grad is not None: self.grads.append(tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: self.grads.append(grad) with tf.variable_scope("opt", reuse=None): if not forward_only: self.optim = self.opt.apply_gradients( zip(self.grads, self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a PTRModel QA model finished")