def build_test_model(self): features = net(self.images, mode=self.mode) features = tf.reshape(features, [-1, self.L, self.D]) print('CNN build model sucess!') alpha_list = [] pred_bboxs = [] lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H) lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell,\ input_keep_prob=self.drop_prob) # Get initial state of LSTM c, h = self._get_initial_lstm(features=features) # Loop for t steps for t in range(self.T): # Attention context, alpha = self._attention_layer(features, h, reuse=(t!=0)) alpha_list.append(alpha) # LSTM step with tf.variable_scope('lstm', reuse=(t!=0)): _, (c, h) = lstm_cell(inputs=context, state=[c, h]) # Prediction logits = self._prediction_layer(h, reuse=(t!=0)) # Collect pred_bboxs.append(logits) return pred_bboxs, alpha_list
def build_test_model(self): features, layer_4 = net(self.images, mode=self.is_train) _, H1, W1, D1 = features.get_shape().as_list() _, H2, W2, D2 = layer_4.get_shape().as_list() features = tf.reshape(features, [-1, self.L, self.D]) # STN stn_output = self._stn_layer(name_scope='Localization_STN', inputs=layer_4, reuse=False) stn_output = tf.nn.avg_pool(value=stn_output, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') _, h1, w1, d1 = stn_output.get_shape().as_list() stn_output = tf.reshape(features, [-1, h1 * w1, D2]) print(stn_output.get_shape()) print('CNN build model sucess!') batch_size = tf.shape(features)[0] alpha_list = [] pred_bboxs = [] pred_cptha = [] lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H) lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell,\ input_keep_prob=self.drop_prob) # Get initial state of LSTM c, h = self._get_initial_lstm(features=features) # Loop for t steps for t in range(self.T): # Attend to final features context, out_ft, alpha = self._attention_layer(features, h, reuse=(t != 0)) # Attend to STN features stn_output_attn = stn_output * tf.expand_dims(alpha, 2) # Collect masks alpha_list.append(alpha) # LSTM step with tf.variable_scope('lstm', reuse=(t != 0)): _, (c, h) = lstm_cell(inputs=context, state=[c, h]) # BBox Prediction bbox_pred = self._prediction_layer(name_scope='bbox_pred_layer',\ inputs=h, outputs=4, H=self.H, reuse=(t!=0)) # CAPTCHA prediction interm_captcha_pred = self._interm_prediction_layer(name_scope='interm_captcha_pred',\ inputs=out_ft, stn_inputs=stn_output_attn,\ outputs=512, H=[D1, D2], reuse=(t!=0)) interm_captcha_pred = tf.nn.dropout(interm_captcha_pred, keep_prob=self.drop_prob) captcha_pred = self._prediction_layer(name_scope='captcha_pred_layer',\ inputs=interm_captcha_pred, outputs=64, H=512, reuse=(t!=0)) # Collects pred_bboxs.append(bbox_pred) pred_cptha.append(captcha_pred) return pred_bboxs, pred_cptha, alpha_list
def build_model(self): features = net(self.images, mode=self.mode) features = tf.reshape(features, [-1, self.L, self.D]) print('CNN build model sucess!') bboxes = self.bboxes gnd_attn = self.gnd_attn batch_size = tf.shape(features)[0] final_loss = 0.0 alpha_list = [] lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H) lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell,\ input_keep_prob=self.drop_prob) # Get initial state of LSTM c, h = self._get_initial_lstm(features=features) # Loop for t steps for t in range(self.T): # Attention context, alpha = self._attention_layer(features, h, reuse=(t!=0)) alpha_list.append(alpha) # LSTM step with tf.variable_scope('lstm', reuse=(t!=0)): _, (c, h) = lstm_cell(inputs=context, state=[c, h]) # Prediction logits = self._prediction_layer(h, reuse=(t!=0)) # Loss at each time step interm_loss = self._mean_squared_error(grd_bboxes=self.bboxes[:, t, :], pred_bboxes=logits) # Collect loss final_loss += tf.reduce_sum(interm_loss) if self.alpha_c > 0: ## KL-loss alpha_loss = 0.0 for T in range(self.T): pred_alpha = alpha_list[T] # (N, L) grnd_alpha = self.gnd_attn[:, T, :] # (N, L) eror_alpha = grnd_alpha * tf.log(grnd_alpha/(pred_alpha + 0.0001) + 1e-8) # Avoid NaN alpha_loss += tf.reduce_sum(eror_alpha) # (1) # Weight alpha loss alpha_reg = self.alpha_c * alpha_loss # Add alpha loss to final_loss += alpha_reg else: print('No Attention Regularization!') if self.l2 > 0: print('L2 regularization:') for var in tf.trainable_variables(): tf_var = var.name if tf_var[-8:-2] != 'biases' and tf_var[-6:-2] != 'bias': print(tf_var) final_loss = final_loss + (self.l2 * tf.nn.l2_loss(var)) print('...............................................................') return final_loss/tf.to_float(batch_size)
def main(): classifier1 = net().float() optimizer1 = optim.Adam(classifier1.parameters(), lr=1e-4) classifier2 = net2().float() optimizer2 = optim.Adam(classifier2.parameters(), lr=1e-4) dataset = dwellingsDataset() data = DataLoader(dataset, batch_size=100, shuffle=True, num_workers=4) epochs = 100 results_path = '../../models/' # Results files: loss_file1 = results_path + 'kampala_classifier_losses_net1.csv' accuracy_file1 = results_path + 'kampala_classifier_accuracy_net1.csv' net_file1 = results_path + 'kampala_classifier_net1.pkl' loss_file2 = results_path + 'kampala_classifier_losses_net2.csv' accuracy_file2 = results_path + 'kampala_classifier_accuracy_net2.csv' net_file2 = results_path + 'kampala_classifier_net2.pkl' # Creating new csv files with results of classifier: with open(loss_file1, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(['Epoch', 'Iteration', 'Loss']) with open(accuracy_file1, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(['Epoch', 'Accuracy']) with open(loss_file2, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(['Epoch', 'Iteration', 'Loss']) with open(accuracy_file2, 'w', newline='') as file: writer = csv.writer(file) writer.writerow(['Epoch', 'Accuracy']) # Training nets for epoch in range(epochs): train(data, classifier1, optimizer1, epoch, loss_file1, accuracy_file1, net_file1) train(data, classifier2, optimizer2, epoch, loss_file2, accuracy_file2, net_file2)
def build_model(self): features, layer_4 = net(self.images, mode=self.is_train) _, H1, W1, D1 = features.get_shape().as_list() _, H2, W2, D2 = layer_4.get_shape().as_list() features = tf.reshape(features, [-1, self.L, self.D]) # STN stn_output = self._stn_layer(name_scope='Localization_STN', inputs=layer_4, reuse=False) stn_output = tf.nn.avg_pool(value=stn_output, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME') _, h1, w1, d1 = stn_output.get_shape().as_list() stn_output = tf.reshape(features, [-1, h1 * w1, D2]) print(stn_output.get_shape()) print('CNN build model sucess!') batch_size = tf.shape(features)[0] final_loss = 0.0 alpha_list = [] pred_bboxs = [] pred_cptha = [] lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H) lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell,\ input_keep_prob=self.drop_prob) # Get initial state of LSTM c, h = self._get_initial_lstm(features=features) # Loop for t steps for t in range(self.T): # Attend to final features context, out_ft, alpha = self._attention_layer(features, h, reuse=(t != 0)) # Attend to STN features stn_output_attn = stn_output * tf.expand_dims(alpha, 2) # Collect masks alpha_list.append(alpha) # LSTM step with tf.variable_scope('lstm', reuse=(t != 0)): _, (c, h) = lstm_cell(inputs=context, state=[c, h]) # BBox Prediction bbox_pred = self._prediction_layer(name_scope='bbox_pred_layer',\ inputs=h, outputs=4, H=self.H, reuse=(t!=0)) # CAPTCHA prediction interm_captcha_pred = self._interm_prediction_layer(name_scope='interm_captcha_pred',\ inputs=out_ft, stn_inputs=stn_output_attn,\ outputs=512, H=[D1, D2], reuse=(t!=0)) interm_captcha_pred = tf.nn.dropout(interm_captcha_pred, keep_prob=self.drop_prob) captcha_pred = self._prediction_layer(name_scope='captcha_pred_layer',\ inputs=interm_captcha_pred, outputs=64, H=512, reuse=(t!=0)) # Collects pred_bboxs.append(bbox_pred) pred_cptha.append(captcha_pred) # Loss interm_loss_digit = self._softmax_cross_entropy( labels=self.labels[:, t, :], logits=captcha_pred) interm_loss_bbox = self._mean_squared_error( grd_bboxes=self.bboxes[:, t, :], pred_bboxes=bbox_pred) # Collect loss final_loss += tf.reduce_sum(interm_loss_digit) + tf.reduce_sum( interm_loss_bbox) if self.alpha_c > 0: ## KL-loss alpha_loss = 0.0 for T in range(self.T): pred_alpha = alpha_list[T] # (N, L) grnd_alpha = self.gnd_attn[:, T, :] # (N, L) eror_alpha = grnd_alpha * tf.log( grnd_alpha / (pred_alpha + 0.0001) + 1e-8) # Avoid NaN alpha_loss += tf.reduce_sum(eror_alpha) # (1) # Weight alpha loss alpha_reg = self.alpha_c * alpha_loss # Add alpha loss to final_loss += alpha_reg else: print('No Attention Regularization!') if self.l2 > 0: print('L2 regularization:') for var in tf.trainable_variables(): tf_var = var.name if tf_var[-8:-2] != 'biases' and tf_var[-6:-2] != 'bias': print(tf_var) final_loss = final_loss + (self.l2 * tf.nn.l2_loss(var)) return final_loss / tf.to_float(batch_size), pred_bboxs, pred_cptha