class AlexNetController(): def __init__(self, rnn_size, encoding_size, image_size=128, args=None): # self.lstm = tf.nn.rnn_cell.BasicLSTMCell(rnn_size) self.lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size) self.args = args # Load in the Alex net self.use_pretrained = args.use_pretrained if self.use_pretrained: self.alexnet = AlexNet() self.alexnet.load_weights() else: self.alexnet = alexnet_OLD.AlexNet() self.encoding_size = encoding_size self.image_size = image_size def __call__(self, img_inp, shifted_label, vector_inp, state, scope='AlexNetController'): # Q: does the img_inp need to be of 224x224? # Have to ensure that the input is in img form. Reshape to get the right input image size img_inp = tf.cast(img_inp, tf.float32) if self.args.dataset_type == 'omniglot': img_inp = tf.reshape(img_inp, [-1, self.image_size, self.image_size]) # img_inp = tf.stack([img_inp]*3, axis=-1) img_inp = tf.expand_dims(img_inp, axis=-1) vector_inp = tf.cast(vector_inp, tf.float32) net = self.alexnet.feed_forward(img_inp, architecture='encoding') net['flattened'] = tf.contrib.layers.flatten(net['output']) fc = {} with tf.variable_scope(scope): # If get casting issue make sure that the architecture is right fc['fc1'] = fc_layer(net['flattened'], 256) fc['fc2'] = fc_layer(fc['fc1'], 64) fc['fc3'] = fc_layer(fc['fc2'], self.encoding_size) fc_output = fc['fc3'] lstm_input = tf.concat([fc_output, shifted_label], axis=1) # flatten vector_inp vector_inp = [ vector_inp[i, :, :] for i in range(vector_inp.get_shape()[0]) ] lstm_input = tf.concat([lstm_input] + vector_inp, axis=1) return self.lstm(lstm_input, state) def zero_state(self, batch_size, dtype): return self.lstm.zero_state(batch_size, dtype)
def create_compute_graph(self): self.inputs = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) self.raw_labels = tf.placeholder(tf.int64, shape=(None)) self.labels = tf.one_hot(self.raw_labels, self.to_num_classes) self.src_noise_levels = tf.placeholder(tf.float32, shape=(self.L + 1)) self.target_noise_levels = tf.placeholder(tf.float32, shape=(self.L + 1)) if self.from_arch == 'ladder' and self.to_arch == 'alex': self.load_ladder_weights() alexnet_class = AlexNet() alexnet = alexnet_class.feed_forward(self.inputs) with tf.variable_scope('progressive_net'): alexnet['pool_3'] = tf.contrib.layers.flatten( alexnet['pool_3']) # TODO(dbthaker): Change activation fn alexnet['fc_1'] = fc_layer(alexnet['pool_3'], 250) alexnet['fc_1'] = tf.layers.batch_normalization( alexnet['fc_1']) alexnet['fc_1'] = tf.layers.dropout(alexnet['fc_1'], 0.5) ladder1 = tf.matmul(alexnet['pool_3'], self.weights['W'][self.L - 3]) ladder1 = tf.layers.batch_normalization(ladder1, training=False) ladder1 = tf.nn.relu(ladder1 + self.weights['beta'][self.L - 3]) first_out = fc_layer(alexnet['fc_1'], 250) second_out = fc_layer(ladder1, 250) alexnet['fc_2'] = first_out + second_out alexnet['fc_2'] = tf.layers.batch_normalization( alexnet['fc_2']) alexnet['fc_2'] = tf.layers.dropout(alexnet['fc_2'], 0.5) ladder2 = tf.matmul(ladder1, self.weights['W'][self.L - 2]) ladder2 = tf.layers.batch_normalization(ladder2, training=False) ladder2 = tf.nn.relu(ladder2 + self.weights['beta'][self.L - 2]) first_out = fc_layer(alexnet['fc_2'], self.to_num_classes, activation_fn=None) second_out = fc_layer(ladder2, self.to_num_classes, activation_fn=None) alexnet['output'] = first_out + second_out alexnet['predicted'] = tf.cast(tf.argmax(\ tf.nn.softmax(alexnet['output']), axis=-1), tf.int64) net = alexnet elif (self.from_arch == 'ladder' or self.from_arch == 'baseline') \ and self.to_arch == 'fc': self.gs_inputs = tf.image.rgb_to_grayscale(self.inputs) self.res_inputs = tf.image.resize_images(self.gs_inputs, (28, 28)) self.load_ladder_weights(self.from_arch) from_net = {} to_net = {} from_net['fc0'] = tf.contrib.layers.flatten(self.res_inputs) from_net['fc0'] = gaussian_noise_layer(from_net['fc0'], self.src_noise_levels[0]) to_net['fc0'] = tf.contrib.layers.flatten(self.res_inputs) to_net['fc0'] = gaussian_noise_layer(to_net['fc0'], self.target_noise_levels[0]) for l in range(1, self.L + 2): prev = 'fc{}'.format(l - 1) curr = 'fc{}'.format(l) if l != self.L + 1: from_net[curr] = tf.matmul(from_net[prev], self.weights['W'][l - 1]) from_net[curr] = tf.layers.batch_normalization( from_net[curr], training=False) from_net[curr] = tf.nn.relu(from_net[curr] + self.weights['beta'][l - 1]) from_net[curr] = gaussian_noise_layer( from_net[curr], self.src_noise_levels[l - 1]) first_out = fc_layer(to_net[prev], self.layer_sizes[l - 1], \ scope="first_fc{}".format(l), activation_fn=tf.nn.relu) #scale = tf.Variable(tf.random_normal([1], stddev=0.5)) #first_out = scale * first_out print("Ladder {} -> Ladder {}".format(prev, curr)) if l != 1: second_out = fc_layer(from_net[prev], self.layer_sizes[l - 1], \ scope="second_fc{}".format(l), activation_fn=tf.nn.relu) to_net[curr] = first_out + second_out to_net[curr] = gaussian_noise_layer(to_net[curr], \ self.target_noise_levels[l - 1]) print("Ladder {} -> New {} {} + New {} -> New {} {}".format(\ prev, curr, self.layer_sizes[l - 1], \ prev, curr, self.layer_sizes[l - 1])) else: to_net[curr] = first_out to_net[curr] = gaussian_noise_layer(to_net[curr], \ self.target_noise_levels[l - 1]) print("New {} -> New {} {}".format(\ prev, curr, self.layer_sizes[l - 1])) to_net['output'] = to_net['fc{}'.format(self.L + 1)] to_net['predicted'] = tf.cast(tf.argmax(tf.nn.softmax( \ to_net['output']), axis=-1), tf.int64) net = to_net if self.from_arch == 'ladder': bounds = [70000] values = [1e-4, 1e-5] else: bounds = [70000] values = [1e-4, 1e-5] self.step_op = tf.Variable(0, name='step', trainable=False) self.lr = tf.train.piecewise_constant(self.step_op, bounds, values) elif (self.from_arch == 'ladder' or self.from_arch == 'baseline') \ and self.to_arch == 'pre_fc': self.load_ladder_weights(self.from_arch, trainable=True) net = self.fc_decoder() bounds = [70000] values = [1e-4, 1e-5] self.step_op = tf.Variable(0, name='step', trainable=False) self.lr = tf.train.piecewise_constant(self.step_op, bounds, values) elif self.from_arch == "None" and self.to_arch == 'fc': fc_decoder = self.fc_decoder() net = fc_decoder bounds = [70000] values = [1e-4, 1e-5] self.step_op = tf.Variable(0, name='step', trainable=False) self.lr = tf.train.piecewise_constant(self.step_op, bounds, values) elif self.from_arch == 'conv_ladder' and self.to_arch == 'conv': self.gs_inputs = tf.image.rgb_to_grayscale(self.inputs) self.res_inputs = tf.image.resize_images(self.gs_inputs, (28, 28)) net = {} net['0'] = self.res_inputs self.weights = {'beta': {k : self.bi(0.0, v[0], "beta", scope="transfer_weights") \ for (k, v) in self.conv_params.items()}} # Hack: Hardcode last beta weight for fully connected + softmax at end. self.weights['beta'][8] = self.bi(0.0, 10, "beta", scope="transfer_weights") for (l, layer_type) in enumerate(self.layers): if l == 0: prev = '0' else: prev = "{}{}".format(self.layers[l - 1], l - 1) curr = "{}{}".format(self.layers[l], l) if layer_type == 'conv': net[curr] = conv_layer(net[prev], self.conv_params[l][0], \ self.conv_params[l][1], \ scope="conv{}".format(l), trainable=False) elif layer_type == 'maxpool': net[curr] = pool_layer(net[prev], 'max') elif layer_type == 'avgpool': net[curr] = pool_layer(net[prev], 'avg') elif layer_type == 'fc': set_trace() net[prev] = tf.contrib.layers.flatten(net[prev]) net[curr] = fc_layer(net[prev], self.fc_params[l], \ scope="fc{}".format(l), trainable=False) if layer_type == 'conv': net[curr] = tf.nn.relu(net[curr] + self.weights['beta'][l]) sess = tf.Session() saver = tf.train.Saver() fm = 'conv_checkpoints' ckpt = tf.train.get_checkpoint_state(fm) if ckpt and ckpt.model_checkpoint_path: set_trace() checkpoint_path = ckpt.model_checkpoint_path saver.restore(sess, checkpoint_path) epoch_n = int(checkpoint_path.split('-')[1]) eprint("Restored Epoch ", epoch_n) eprint("Restored weights from file {}".format(fm)) from_net = net to_net = {} for (l, layer_type) in enumerate(self.layers): if l == 0: prev = '0' else: prev = '{}{}'.format(self.layers[l - 1], l - 1) curr = '{}{}'.format(self.layers[l], l) if layer_type == 'conv': first_out = conv_layer(from_net[prev], self.conv_params[l][0], \ self.conv_params[l][1], \ scope="first_conv{}".format(l)) second_out = conv_layer(to_net[prev], self.conv_params[l][0], \ self.conv_params[l][1], \ scope="sec_conv{}".format(l)) to_net[curr] = first_out + second_out elif layer_type == 'maxpool': to_net[curr] = pool_layer(to_net[prev], 'max') elif layer_type == 'avgpool': net[curr] = pool_layer(to_net[prev], 'avg') elif layer_type == 'fc': net[prev] = tf.contrib.layers.flatten(net[prev]) net[curr] = fc_layer(net[prev], self.fc_params[l], scope="fc{}".format(l)) if layer_type == 'conv': net[curr] = tf.nn.relu(net[curr] + self.weights['beta'][l]) elif self.from_arch == "None" and self.to_arch == 'conv': self.gs_inputs = tf.image.rgb_to_grayscale(self.inputs) self.res_inputs = tf.image.resize_images(self.gs_inputs, (28, 28)) net = {} net['0'] = self.res_inputs self.weights = {'W_raw': [self.wi((10, 10), "W", scope="transfer_weights")], \ 'beta': {k : self.bi(0.0, v[0], "beta", scope="transfer_weights") \ for (k, v) in self.conv_params.items()}, 'gamma': {8: self.bi(1.0, 10, "gamma")}} # Hack: Hardcode last beta weight for fully connected + softmax at end. self.weights['beta'][8] = self.bi(0.0, 10, "beta", scope="transfer_weights") for (l, layer_type) in enumerate(self.layers): if l == 0: prev = '0' else: prev = "{}{}".format(self.layers[l - 1], l - 1) curr = "{}{}".format(self.layers[l], l) if layer_type == 'conv': net[curr] = conv_layer(net[prev], self.conv_params[l][0], \ self.conv_params[l][1], \ scope="conv{}".format(l)) elif layer_type == 'maxpool': net[curr] = pool_layer(net[prev], 'max') elif layer_type == 'avgpool': net[curr] = pool_layer(net[prev], 'avg') elif layer_type == 'fc': net[prev] = tf.contrib.layers.flatten(net[prev]) net[curr] = fc_layer(net[prev], self.fc_params[l], scope="fc{}".format(l)) if layer_type == 'conv': net[curr] = tf.nn.relu(net[curr] + self.weights['beta'][l]) elif layer_type == 'fc': net[curr] = tf.nn.softmax(self.weights['gamma'][l] * (net[curr] + \ self.weights['beta'][l])) net['output'] = net['fc{}'.format(self.L)] net['predicted'] = tf.cast(tf.argmax( \ net['output'], axis=-1), tf.int64) bounds = [15000] values = [1e-4, 1e-5] self.step_op = tf.Variable(0, name='step', trainable=False) self.lr = tf.train.piecewise_constant(self.step_op, bounds, values) eprint("{} architecture -> {} architecture".format( self.from_arch, self.to_arch)) eprint( "Total number of variables used ", np.sum([ v.get_shape().num_elements() for v in tf.trainable_variables() ])) eprint("Learning rate: {}".format(self.lr)) reg_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=net['output'], labels=self.labels)) self.loss = self.loss + 1e-6 * reg_loss self.minimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss, \ global_step=self.step_op) self.correct = tf.equal(net['predicted'], self.raw_labels) self.accuracy = tf.reduce_mean(tf.cast(self.correct, tf.float32))