Exemplo n.º 1
0
class AlexNetController():
    def __init__(self, rnn_size, encoding_size, image_size=128, args=None):
        #  self.lstm = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
        self.lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        self.args = args
        # Load in the Alex net
        self.use_pretrained = args.use_pretrained
        if self.use_pretrained:
            self.alexnet = AlexNet()
            self.alexnet.load_weights()
        else:
            self.alexnet = alexnet_OLD.AlexNet()
        self.encoding_size = encoding_size
        self.image_size = image_size

    def __call__(self,
                 img_inp,
                 shifted_label,
                 vector_inp,
                 state,
                 scope='AlexNetController'):
        # Q: does the img_inp need to be of 224x224?
        # Have to ensure that the input is in img form. Reshape to get the right input image size
        img_inp = tf.cast(img_inp, tf.float32)
        if self.args.dataset_type == 'omniglot':
            img_inp = tf.reshape(img_inp,
                                 [-1, self.image_size, self.image_size])
            #  img_inp = tf.stack([img_inp]*3, axis=-1)
            img_inp = tf.expand_dims(img_inp, axis=-1)
        vector_inp = tf.cast(vector_inp, tf.float32)
        net = self.alexnet.feed_forward(img_inp, architecture='encoding')
        net['flattened'] = tf.contrib.layers.flatten(net['output'])
        fc = {}
        with tf.variable_scope(scope):
            # If get casting issue make sure that the architecture is right
            fc['fc1'] = fc_layer(net['flattened'], 256)
            fc['fc2'] = fc_layer(fc['fc1'], 64)
            fc['fc3'] = fc_layer(fc['fc2'], self.encoding_size)
            fc_output = fc['fc3']
        lstm_input = tf.concat([fc_output, shifted_label], axis=1)
        # flatten vector_inp
        vector_inp = [
            vector_inp[i, :, :] for i in range(vector_inp.get_shape()[0])
        ]
        lstm_input = tf.concat([lstm_input] + vector_inp, axis=1)
        return self.lstm(lstm_input, state)

    def zero_state(self, batch_size, dtype):
        return self.lstm.zero_state(batch_size, dtype)
Exemplo n.º 2
0
    def create_compute_graph(self):
        self.inputs = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        self.raw_labels = tf.placeholder(tf.int64, shape=(None))
        self.labels = tf.one_hot(self.raw_labels, self.to_num_classes)
        self.src_noise_levels = tf.placeholder(tf.float32, shape=(self.L + 1))
        self.target_noise_levels = tf.placeholder(tf.float32,
                                                  shape=(self.L + 1))

        if self.from_arch == 'ladder' and self.to_arch == 'alex':
            self.load_ladder_weights()
            alexnet_class = AlexNet()
            alexnet = alexnet_class.feed_forward(self.inputs)

            with tf.variable_scope('progressive_net'):
                alexnet['pool_3'] = tf.contrib.layers.flatten(
                    alexnet['pool_3'])

                # TODO(dbthaker): Change activation fn
                alexnet['fc_1'] = fc_layer(alexnet['pool_3'], 250)
                alexnet['fc_1'] = tf.layers.batch_normalization(
                    alexnet['fc_1'])
                alexnet['fc_1'] = tf.layers.dropout(alexnet['fc_1'], 0.5)

                ladder1 = tf.matmul(alexnet['pool_3'],
                                    self.weights['W'][self.L - 3])
                ladder1 = tf.layers.batch_normalization(ladder1,
                                                        training=False)
                ladder1 = tf.nn.relu(ladder1 +
                                     self.weights['beta'][self.L - 3])

                first_out = fc_layer(alexnet['fc_1'], 250)
                second_out = fc_layer(ladder1, 250)
                alexnet['fc_2'] = first_out + second_out
                alexnet['fc_2'] = tf.layers.batch_normalization(
                    alexnet['fc_2'])
                alexnet['fc_2'] = tf.layers.dropout(alexnet['fc_2'], 0.5)
                ladder2 = tf.matmul(ladder1, self.weights['W'][self.L - 2])
                ladder2 = tf.layers.batch_normalization(ladder2,
                                                        training=False)
                ladder2 = tf.nn.relu(ladder2 +
                                     self.weights['beta'][self.L - 2])

                first_out = fc_layer(alexnet['fc_2'],
                                     self.to_num_classes,
                                     activation_fn=None)
                second_out = fc_layer(ladder2,
                                      self.to_num_classes,
                                      activation_fn=None)
                alexnet['output'] = first_out + second_out
                alexnet['predicted'] = tf.cast(tf.argmax(\
                    tf.nn.softmax(alexnet['output']), axis=-1), tf.int64)
            net = alexnet
        elif (self.from_arch == 'ladder' or self.from_arch == 'baseline') \
                and self.to_arch == 'fc':
            self.gs_inputs = tf.image.rgb_to_grayscale(self.inputs)
            self.res_inputs = tf.image.resize_images(self.gs_inputs, (28, 28))
            self.load_ladder_weights(self.from_arch)
            from_net = {}
            to_net = {}
            from_net['fc0'] = tf.contrib.layers.flatten(self.res_inputs)
            from_net['fc0'] = gaussian_noise_layer(from_net['fc0'],
                                                   self.src_noise_levels[0])
            to_net['fc0'] = tf.contrib.layers.flatten(self.res_inputs)
            to_net['fc0'] = gaussian_noise_layer(to_net['fc0'],
                                                 self.target_noise_levels[0])

            for l in range(1, self.L + 2):
                prev = 'fc{}'.format(l - 1)
                curr = 'fc{}'.format(l)

                if l != self.L + 1:
                    from_net[curr] = tf.matmul(from_net[prev],
                                               self.weights['W'][l - 1])
                    from_net[curr] = tf.layers.batch_normalization(
                        from_net[curr], training=False)
                    from_net[curr] = tf.nn.relu(from_net[curr] +
                                                self.weights['beta'][l - 1])
                    from_net[curr] = gaussian_noise_layer(
                        from_net[curr], self.src_noise_levels[l - 1])

                first_out = fc_layer(to_net[prev], self.layer_sizes[l - 1], \
                    scope="first_fc{}".format(l), activation_fn=tf.nn.relu)
                #scale = tf.Variable(tf.random_normal([1], stddev=0.5))
                #first_out = scale * first_out
                print("Ladder {} -> Ladder {}".format(prev, curr))
                if l != 1:
                    second_out = fc_layer(from_net[prev], self.layer_sizes[l - 1], \
                        scope="second_fc{}".format(l), activation_fn=tf.nn.relu)
                    to_net[curr] = first_out + second_out
                    to_net[curr] = gaussian_noise_layer(to_net[curr], \
                        self.target_noise_levels[l - 1])
                    print("Ladder {} -> New {} {} + New {} -> New {} {}".format(\
                        prev, curr, self.layer_sizes[l - 1], \
                        prev, curr, self.layer_sizes[l - 1]))
                else:
                    to_net[curr] = first_out
                    to_net[curr] = gaussian_noise_layer(to_net[curr], \
                        self.target_noise_levels[l - 1])
                    print("New {} -> New {} {}".format(\
                        prev, curr, self.layer_sizes[l - 1]))
            to_net['output'] = to_net['fc{}'.format(self.L + 1)]
            to_net['predicted'] = tf.cast(tf.argmax(tf.nn.softmax( \
                    to_net['output']), axis=-1), tf.int64)
            net = to_net
            if self.from_arch == 'ladder':
                bounds = [70000]
                values = [1e-4, 1e-5]
            else:
                bounds = [70000]
                values = [1e-4, 1e-5]
            self.step_op = tf.Variable(0, name='step', trainable=False)
            self.lr = tf.train.piecewise_constant(self.step_op, bounds, values)
        elif (self.from_arch == 'ladder' or self.from_arch == 'baseline') \
                and self.to_arch == 'pre_fc':
            self.load_ladder_weights(self.from_arch, trainable=True)
            net = self.fc_decoder()
            bounds = [70000]
            values = [1e-4, 1e-5]
            self.step_op = tf.Variable(0, name='step', trainable=False)
            self.lr = tf.train.piecewise_constant(self.step_op, bounds, values)
        elif self.from_arch == "None" and self.to_arch == 'fc':
            fc_decoder = self.fc_decoder()
            net = fc_decoder
            bounds = [70000]
            values = [1e-4, 1e-5]
            self.step_op = tf.Variable(0, name='step', trainable=False)
            self.lr = tf.train.piecewise_constant(self.step_op, bounds, values)
        elif self.from_arch == 'conv_ladder' and self.to_arch == 'conv':
            self.gs_inputs = tf.image.rgb_to_grayscale(self.inputs)
            self.res_inputs = tf.image.resize_images(self.gs_inputs, (28, 28))

            net = {}
            net['0'] = self.res_inputs

            self.weights = {'beta': {k : self.bi(0.0, v[0], "beta", scope="transfer_weights") \
                                 for (k, v) in self.conv_params.items()}}
            # Hack: Hardcode last beta weight for fully connected + softmax at end.
            self.weights['beta'][8] = self.bi(0.0,
                                              10,
                                              "beta",
                                              scope="transfer_weights")

            for (l, layer_type) in enumerate(self.layers):
                if l == 0:
                    prev = '0'
                else:
                    prev = "{}{}".format(self.layers[l - 1], l - 1)
                curr = "{}{}".format(self.layers[l], l)
                if layer_type == 'conv':
                    net[curr] = conv_layer(net[prev], self.conv_params[l][0], \
                                       self.conv_params[l][1], \
                                       scope="conv{}".format(l), trainable=False)
                elif layer_type == 'maxpool':
                    net[curr] = pool_layer(net[prev], 'max')
                elif layer_type == 'avgpool':
                    net[curr] = pool_layer(net[prev], 'avg')
                elif layer_type == 'fc':
                    set_trace()
                    net[prev] = tf.contrib.layers.flatten(net[prev])
                    net[curr] = fc_layer(net[prev], self.fc_params[l], \
                            scope="fc{}".format(l), trainable=False)

                if layer_type == 'conv':
                    net[curr] = tf.nn.relu(net[curr] + self.weights['beta'][l])
            sess = tf.Session()
            saver = tf.train.Saver()
            fm = 'conv_checkpoints'
            ckpt = tf.train.get_checkpoint_state(fm)
            if ckpt and ckpt.model_checkpoint_path:
                set_trace()
                checkpoint_path = ckpt.model_checkpoint_path
                saver.restore(sess, checkpoint_path)
                epoch_n = int(checkpoint_path.split('-')[1])
                eprint("Restored Epoch ", epoch_n)
            eprint("Restored weights from file {}".format(fm))

            from_net = net
            to_net = {}

            for (l, layer_type) in enumerate(self.layers):
                if l == 0:
                    prev = '0'
                else:
                    prev = '{}{}'.format(self.layers[l - 1], l - 1)
                curr = '{}{}'.format(self.layers[l], l)

                if layer_type == 'conv':
                    first_out = conv_layer(from_net[prev], self.conv_params[l][0], \
                                      self.conv_params[l][1], \
                                      scope="first_conv{}".format(l))
                    second_out = conv_layer(to_net[prev], self.conv_params[l][0], \
                                      self.conv_params[l][1], \
                                      scope="sec_conv{}".format(l))
                    to_net[curr] = first_out + second_out
                elif layer_type == 'maxpool':
                    to_net[curr] = pool_layer(to_net[prev], 'max')
                elif layer_type == 'avgpool':
                    net[curr] = pool_layer(to_net[prev], 'avg')
                elif layer_type == 'fc':
                    net[prev] = tf.contrib.layers.flatten(net[prev])
                    net[curr] = fc_layer(net[prev],
                                         self.fc_params[l],
                                         scope="fc{}".format(l))

                if layer_type == 'conv':
                    net[curr] = tf.nn.relu(net[curr] + self.weights['beta'][l])

        elif self.from_arch == "None" and self.to_arch == 'conv':
            self.gs_inputs = tf.image.rgb_to_grayscale(self.inputs)
            self.res_inputs = tf.image.resize_images(self.gs_inputs, (28, 28))
            net = {}
            net['0'] = self.res_inputs

            self.weights = {'W_raw': [self.wi((10, 10), "W", scope="transfer_weights")], \
                        'beta': {k : self.bi(0.0, v[0], "beta", scope="transfer_weights") \
                                 for (k, v) in self.conv_params.items()},
                        'gamma': {8: self.bi(1.0, 10, "gamma")}}
            # Hack: Hardcode last beta weight for fully connected + softmax at end.
            self.weights['beta'][8] = self.bi(0.0,
                                              10,
                                              "beta",
                                              scope="transfer_weights")

            for (l, layer_type) in enumerate(self.layers):
                if l == 0:
                    prev = '0'
                else:
                    prev = "{}{}".format(self.layers[l - 1], l - 1)
                curr = "{}{}".format(self.layers[l], l)
                if layer_type == 'conv':
                    net[curr] = conv_layer(net[prev], self.conv_params[l][0], \
                                       self.conv_params[l][1], \
                                       scope="conv{}".format(l))
                elif layer_type == 'maxpool':
                    net[curr] = pool_layer(net[prev], 'max')
                elif layer_type == 'avgpool':
                    net[curr] = pool_layer(net[prev], 'avg')
                elif layer_type == 'fc':
                    net[prev] = tf.contrib.layers.flatten(net[prev])
                    net[curr] = fc_layer(net[prev],
                                         self.fc_params[l],
                                         scope="fc{}".format(l))

                if layer_type == 'conv':
                    net[curr] = tf.nn.relu(net[curr] + self.weights['beta'][l])
                elif layer_type == 'fc':
                    net[curr] = tf.nn.softmax(self.weights['gamma'][l] * (net[curr] + \
                        self.weights['beta'][l]))
            net['output'] = net['fc{}'.format(self.L)]
            net['predicted'] = tf.cast(tf.argmax( \
                    net['output'], axis=-1), tf.int64)
            bounds = [15000]
            values = [1e-4, 1e-5]
            self.step_op = tf.Variable(0, name='step', trainable=False)
            self.lr = tf.train.piecewise_constant(self.step_op, bounds, values)

        eprint("{} architecture -> {} architecture".format(
            self.from_arch, self.to_arch))
        eprint(
            "Total number of variables used ",
            np.sum([
                v.get_shape().num_elements() for v in tf.trainable_variables()
            ]))
        eprint("Learning rate: {}".format(self.lr))

        reg_loss = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=net['output'],
                                                    labels=self.labels))
        self.loss = self.loss + 1e-6 * reg_loss

        self.minimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss, \
                global_step=self.step_op)
        self.correct = tf.equal(net['predicted'], self.raw_labels)
        self.accuracy = tf.reduce_mean(tf.cast(self.correct, tf.float32))