Exemplo n.º 1
0
    def build_test_model(self):
        features   = net(self.images, mode=self.mode)
        features   = tf.reshape(features, [-1, self.L, self.D])
        print('CNN build model sucess!')

        alpha_list = []
        pred_bboxs = []
        lstm_cell  = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H)
        lstm_cell  = tf.contrib.rnn.DropoutWrapper(lstm_cell,\
                                                   input_keep_prob=self.drop_prob)
        # Get initial state of LSTM
        c, h = self._get_initial_lstm(features=features)
        # Loop for t steps
        for t in range(self.T):
            # Attention
            context, alpha = self._attention_layer(features, h, reuse=(t!=0))
            alpha_list.append(alpha)
            # LSTM step
            with tf.variable_scope('lstm', reuse=(t!=0)):
                _, (c, h) = lstm_cell(inputs=context, state=[c, h])
            # Prediction
            logits = self._prediction_layer(h, reuse=(t!=0))
            # Collect
            pred_bboxs.append(logits)

        return pred_bboxs, alpha_list
Exemplo n.º 2
0
    def build_test_model(self):
        features, layer_4 = net(self.images, mode=self.is_train)
        _, H1, W1, D1 = features.get_shape().as_list()
        _, H2, W2, D2 = layer_4.get_shape().as_list()
        features = tf.reshape(features, [-1, self.L, self.D])
        # STN
        stn_output = self._stn_layer(name_scope='Localization_STN',
                                     inputs=layer_4,
                                     reuse=False)
        stn_output = tf.nn.avg_pool(value=stn_output,
                                    ksize=[1, 3, 3, 1],
                                    strides=[1, 2, 2, 1],
                                    padding='SAME')
        _, h1, w1, d1 = stn_output.get_shape().as_list()
        stn_output = tf.reshape(features, [-1, h1 * w1, D2])
        print(stn_output.get_shape())
        print('CNN build model sucess!')

        batch_size = tf.shape(features)[0]
        alpha_list = []
        pred_bboxs = []
        pred_cptha = []
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H)
        lstm_cell  = tf.contrib.rnn.DropoutWrapper(lstm_cell,\
                                                   input_keep_prob=self.drop_prob)
        # Get initial state of LSTM
        c, h = self._get_initial_lstm(features=features)
        # Loop for t steps
        for t in range(self.T):
            # Attend to final features
            context, out_ft, alpha = self._attention_layer(features,
                                                           h,
                                                           reuse=(t != 0))
            # Attend to STN features
            stn_output_attn = stn_output * tf.expand_dims(alpha, 2)
            # Collect masks
            alpha_list.append(alpha)
            # LSTM step
            with tf.variable_scope('lstm', reuse=(t != 0)):
                _, (c, h) = lstm_cell(inputs=context, state=[c, h])
            # BBox Prediction
            bbox_pred = self._prediction_layer(name_scope='bbox_pred_layer',\
                                               inputs=h, outputs=4, H=self.H, reuse=(t!=0))
            # CAPTCHA prediction
            interm_captcha_pred = self._interm_prediction_layer(name_scope='interm_captcha_pred',\
                                                         inputs=out_ft, stn_inputs=stn_output_attn,\
                                                         outputs=512, H=[D1, D2], reuse=(t!=0))
            interm_captcha_pred = tf.nn.dropout(interm_captcha_pred,
                                                keep_prob=self.drop_prob)
            captcha_pred = self._prediction_layer(name_scope='captcha_pred_layer',\
                                                  inputs=interm_captcha_pred, outputs=64, H=512, reuse=(t!=0))
            # Collects
            pred_bboxs.append(bbox_pred)
            pred_cptha.append(captcha_pred)

        return pred_bboxs, pred_cptha, alpha_list
Exemplo n.º 3
0
    def build_model(self):
        features   = net(self.images, mode=self.mode)
        features   = tf.reshape(features, [-1, self.L, self.D])
        print('CNN build model sucess!')

        bboxes     = self.bboxes
        gnd_attn   = self.gnd_attn
        batch_size = tf.shape(features)[0]
        final_loss = 0.0
        alpha_list = []
        lstm_cell  = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H)
        lstm_cell  = tf.contrib.rnn.DropoutWrapper(lstm_cell,\
                                                   input_keep_prob=self.drop_prob)
        # Get initial state of LSTM
        c, h = self._get_initial_lstm(features=features)
        # Loop for t steps
        for t in range(self.T):
            # Attention
            context, alpha = self._attention_layer(features, h, reuse=(t!=0))
            alpha_list.append(alpha)
            # LSTM step
            with tf.variable_scope('lstm', reuse=(t!=0)):
                _, (c, h) = lstm_cell(inputs=context, state=[c, h])
            # Prediction
            logits = self._prediction_layer(h, reuse=(t!=0))
            # Loss at each time step
            interm_loss = self._mean_squared_error(grd_bboxes=self.bboxes[:, t, :], pred_bboxes=logits)
            # Collect loss
            final_loss += tf.reduce_sum(interm_loss)

        if self.alpha_c > 0:
            ## KL-loss
            alpha_loss = 0.0
            for T in range(self.T):
                pred_alpha = alpha_list[T] # (N, L)
                grnd_alpha = self.gnd_attn[:, T, :] # (N, L)
                eror_alpha =  grnd_alpha * tf.log(grnd_alpha/(pred_alpha + 0.0001) + 1e-8)  # Avoid NaN
                alpha_loss += tf.reduce_sum(eror_alpha) # (1)
            # Weight alpha loss
            alpha_reg = self.alpha_c * alpha_loss
            # Add alpha loss to
            final_loss += alpha_reg
        else:
            print('No Attention Regularization!')

        if self.l2 > 0:
            print('L2 regularization:')
            for var in tf.trainable_variables():
                tf_var = var.name
                if tf_var[-8:-2] != 'biases' and tf_var[-6:-2] != 'bias':
                    print(tf_var)
                    final_loss = final_loss + (self.l2 * tf.nn.l2_loss(var))
        print('...............................................................')

        return final_loss/tf.to_float(batch_size)
def main():

    classifier1 = net().float()
    optimizer1 = optim.Adam(classifier1.parameters(), lr=1e-4)
    classifier2 = net2().float()
    optimizer2 = optim.Adam(classifier2.parameters(), lr=1e-4)
    dataset = dwellingsDataset()
    data = DataLoader(dataset, batch_size=100, shuffle=True, num_workers=4)
    epochs = 100
    results_path = '../../models/'

    # Results files:
    loss_file1 = results_path + 'kampala_classifier_losses_net1.csv'
    accuracy_file1 = results_path + 'kampala_classifier_accuracy_net1.csv'
    net_file1 = results_path + 'kampala_classifier_net1.pkl'
    loss_file2 = results_path + 'kampala_classifier_losses_net2.csv'
    accuracy_file2 = results_path + 'kampala_classifier_accuracy_net2.csv'
    net_file2 = results_path + 'kampala_classifier_net2.pkl'

    # Creating new csv files with results of classifier:
    with open(loss_file1, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Iteration', 'Loss'])
    with open(accuracy_file1, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Accuracy'])
    with open(loss_file2, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Iteration', 'Loss'])
    with open(accuracy_file2, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Epoch', 'Accuracy'])

    # Training nets
    for epoch in range(epochs):
        train(data, classifier1, optimizer1, epoch, loss_file1, accuracy_file1,
              net_file1)
        train(data, classifier2, optimizer2, epoch, loss_file2, accuracy_file2,
              net_file2)
Exemplo n.º 5
0
    def build_model(self):
        features, layer_4 = net(self.images, mode=self.is_train)
        _, H1, W1, D1 = features.get_shape().as_list()
        _, H2, W2, D2 = layer_4.get_shape().as_list()
        features = tf.reshape(features, [-1, self.L, self.D])
        # STN
        stn_output = self._stn_layer(name_scope='Localization_STN',
                                     inputs=layer_4,
                                     reuse=False)
        stn_output = tf.nn.avg_pool(value=stn_output,
                                    ksize=[1, 3, 3, 1],
                                    strides=[1, 2, 2, 1],
                                    padding='SAME')
        _, h1, w1, d1 = stn_output.get_shape().as_list()
        stn_output = tf.reshape(features, [-1, h1 * w1, D2])
        print(stn_output.get_shape())
        print('CNN build model sucess!')

        batch_size = tf.shape(features)[0]
        final_loss = 0.0
        alpha_list = []
        pred_bboxs = []
        pred_cptha = []
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=self.H)
        lstm_cell  = tf.contrib.rnn.DropoutWrapper(lstm_cell,\
                                                   input_keep_prob=self.drop_prob)
        # Get initial state of LSTM
        c, h = self._get_initial_lstm(features=features)
        # Loop for t steps
        for t in range(self.T):
            # Attend to final features
            context, out_ft, alpha = self._attention_layer(features,
                                                           h,
                                                           reuse=(t != 0))
            # Attend to STN features
            stn_output_attn = stn_output * tf.expand_dims(alpha, 2)
            # Collect masks
            alpha_list.append(alpha)
            # LSTM step
            with tf.variable_scope('lstm', reuse=(t != 0)):
                _, (c, h) = lstm_cell(inputs=context, state=[c, h])
            # BBox Prediction
            bbox_pred = self._prediction_layer(name_scope='bbox_pred_layer',\
                                               inputs=h, outputs=4, H=self.H, reuse=(t!=0))
            # CAPTCHA prediction
            interm_captcha_pred = self._interm_prediction_layer(name_scope='interm_captcha_pred',\
                                                         inputs=out_ft, stn_inputs=stn_output_attn,\
                                                         outputs=512, H=[D1, D2], reuse=(t!=0))
            interm_captcha_pred = tf.nn.dropout(interm_captcha_pred,
                                                keep_prob=self.drop_prob)
            captcha_pred = self._prediction_layer(name_scope='captcha_pred_layer',\
                                                  inputs=interm_captcha_pred, outputs=64, H=512, reuse=(t!=0))
            # Collects
            pred_bboxs.append(bbox_pred)
            pred_cptha.append(captcha_pred)
            # Loss
            interm_loss_digit = self._softmax_cross_entropy(
                labels=self.labels[:, t, :], logits=captcha_pred)
            interm_loss_bbox = self._mean_squared_error(
                grd_bboxes=self.bboxes[:, t, :], pred_bboxes=bbox_pred)
            # Collect loss
            final_loss += tf.reduce_sum(interm_loss_digit) + tf.reduce_sum(
                interm_loss_bbox)

        if self.alpha_c > 0:
            ## KL-loss
            alpha_loss = 0.0
            for T in range(self.T):
                pred_alpha = alpha_list[T]  # (N, L)
                grnd_alpha = self.gnd_attn[:, T, :]  # (N, L)
                eror_alpha = grnd_alpha * tf.log(
                    grnd_alpha / (pred_alpha + 0.0001) + 1e-8)  # Avoid NaN
                alpha_loss += tf.reduce_sum(eror_alpha)  # (1)
            # Weight alpha loss
            alpha_reg = self.alpha_c * alpha_loss
            # Add alpha loss to
            final_loss += alpha_reg
        else:
            print('No Attention Regularization!')

        if self.l2 > 0:
            print('L2 regularization:')
            for var in tf.trainable_variables():
                tf_var = var.name
                if tf_var[-8:-2] != 'biases' and tf_var[-6:-2] != 'bias':
                    print(tf_var)
                    final_loss = final_loss + (self.l2 * tf.nn.l2_loss(var))

        return final_loss / tf.to_float(batch_size), pred_bboxs, pred_cptha