def batch_normalized_linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd, eps=.00001, test=False): """ A linear layer with batch normalization """ with tf.variable_scope(scope_name) as scope: weight = _variable_with_weight_decay( "weights", shape=[n_inputs, n_outputs], stddev=stddev, wd=wd ) act = tf.matmul(state_below, weight) # get moments act_mean, act_variance = tf.nn.moments(act, [0]) # get mean and variance variables mean = _variable_on_cpu('bn_mean', [n_outputs], tf.constant_initializer(0.0), trainable=False) variance = _variable_on_cpu('bn_variance', [n_outputs], tf.constant_initializer(1.0), trainable=False) # assign the moments if not test: assign_mean = mean.assign(act_mean) assign_variance = variance.assign(act_variance) act_bn = tf.mul((act - act_mean), tf.rsqrt(act_variance + eps), name=scope.name+"_bn") else: act_bn = tf.mul((act - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn") beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0)) gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0)) bn = tf.add(tf.mul(act_bn, gamma), beta) # output = tf.nn.relu(bn, name=scope.name) output = randomized_relu(bn, .1, name=scope.name, is_training=(not test)) if not test: output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output) _activation_summary(output) return output
def dense_layer(feed, input_dim, output_dim, dropout=False, keep_prob=None, batch_norm=False, weight_decay=None): weights = _variable_with_weight_decay('weights', shape=[input_dim, output_dim], stddev=0.04, wd=weight_decay) biases = _variable_on_cpu('biases', [output_dim], tf.constant_initializer(0.1)) intermediate = tf.matmul(feed, weights) if batch_norm: mean, variance = tf.nn.moments(intermediate, axes=[0]) epsilon = 1e-5 gamma = _variable_on_cpu('gammas', [output_dim], tf.constant_initializer(1.0)) pre_activation = tf.nn.batch_normalization(intermediate, mean, variance, biases, gamma, epsilon) else: pre_activation = intermediate + biases if dropout: pre_activation = tf.nn.dropout(pre_activation, keep_prob=keep_prob, name="dropout") after_activation = tf.nn.relu(pre_activation, name='activated_out') _activation_summary(after_activation) return after_activation
def batch_normalized_conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd, eps=.00001, test=False): """ Convolutional layer with batch normalization """ with tf.variable_scope(scope_name) as scope: kernel = _variable_with_weight_decay( "weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs], stddev=stddev, wd=wd ) conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME') # get moments conv_mean, conv_variance = tf.nn.moments(conv, [0, 1, 2]) # get mean and variance variables mean = _variable_on_cpu("bn_mean", [n_outputs], tf.constant_initializer(0.0), False) variance = _variable_on_cpu("bn_variance", [n_outputs], tf.constant_initializer(1.0), False) # assign the moments if not test: assign_mean = mean.assign(conv_mean) assign_variance = variance.assign(conv_variance) conv_bn = tf.mul((conv - conv_mean), tf.rsqrt(conv_variance + eps), name=scope.name+"_bn") else: conv_bn = tf.mul((conv - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn") beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0)) gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0)) bn = tf.add(tf.mul(conv_bn, gamma), beta) # output = tf.nn.relu(bn, name=scope.name) output = randomized_relu(bn, .1, name=scope.name, is_training=(not test)) if not test: output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output) _activation_summary(output) return output
def conv2d_stack(feed, kernel_list, stride_list, padding_list, batch_norm=False): if not ((len(kernel_list) == len(stride_list)) and (len(stride_list) == len(padding_list))): return inputs = [] inputs.append(feed) for i in range(len(kernel_list)): with tf.variable_scope('conv%d' % (i + 1)) as scope: kernel = _variable_with_weight_decay('weights', shape=kernel_list[i], stddev=5e-2, wd=None) conv = conv2d(inputs[-1], kernel, stride_list[i], padding=padding_list[i]) biases = _variable_on_cpu('biases', kernel_list[i][-1], tf.constant_initializer(0.0)) if batch_norm: mean, variance = tf.nn.moments(conv, axes=[0]) epsilon = 1e-5 gamma = _variable_on_cpu('gammas', kernel_list[i][-1], tf.constant_initializer(1.0)) pre_activation = tf.nn.batch_normalization( conv, mean, variance, biases, gamma, epsilon) else: pre_activation = tf.nn.bias_add(conv, biases) after_activation = tf.nn.relu(pre_activation, name='activated_out') _activation_summary(after_activation) inputs.append(after_activation) return inputs[-1]
def batch_normalized_conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd, eps=.00001, test=False): """ Convolutional layer with batch normalization """ with tf.variable_scope(scope_name) as scope: kernel = _variable_with_weight_decay( "weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs], stddev=stddev, wd=wd) conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME') # get moments conv_mean, conv_variance = tf.nn.moments(conv, [0, 1, 2]) # get mean and variance variables mean = _variable_on_cpu("bn_mean", [n_outputs], tf.constant_initializer(0.0), False) variance = _variable_on_cpu("bn_variance", [n_outputs], tf.constant_initializer(1.0), False) # assign the moments if not test: assign_mean = mean.assign(conv_mean) assign_variance = variance.assign(conv_variance) conv_bn = tf.mul((conv - conv_mean), tf.rsqrt(conv_variance + eps), name=scope.name + "_bn") else: conv_bn = tf.mul((conv - mean), tf.rsqrt(variance + eps), name=scope.name + "_bn") beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0)) gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0)) bn = tf.add(tf.mul(conv_bn, gamma), beta) # output = tf.nn.relu(bn, name=scope.name) output = randomized_relu(bn, .1, name=scope.name, is_training=(not test)) if not test: output = control_flow_ops.with_dependencies( dependencies=[assign_mean, assign_variance], output_tensor=output) _activation_summary(output) return output
def batch_normalized_linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd, eps=.00001, test=False): """ A linear layer with batch normalization """ with tf.variable_scope(scope_name) as scope: weight = _variable_with_weight_decay("weights", shape=[n_inputs, n_outputs], stddev=stddev, wd=wd) act = tf.matmul(state_below, weight) # get moments act_mean, act_variance = tf.nn.moments(act, [0]) # get mean and variance variables mean = _variable_on_cpu('bn_mean', [n_outputs], tf.constant_initializer(0.0), trainable=False) variance = _variable_on_cpu('bn_variance', [n_outputs], tf.constant_initializer(1.0), trainable=False) # assign the moments if not test: assign_mean = mean.assign(act_mean) assign_variance = variance.assign(act_variance) act_bn = tf.mul((act - act_mean), tf.rsqrt(act_variance + eps), name=scope.name + "_bn") else: act_bn = tf.mul((act - mean), tf.rsqrt(variance + eps), name=scope.name + "_bn") beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0)) gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0)) bn = tf.add(tf.mul(act_bn, gamma), beta) # output = tf.nn.relu(bn, name=scope.name) output = randomized_relu(bn, .1, name=scope.name, is_training=(not test)) if not test: output = control_flow_ops.with_dependencies( dependencies=[assign_mean, assign_variance], output_tensor=output) _activation_summary(output) return output
def batch_norm_for_conv(x, phase_train, scope='bn'): channels = x.shape.as_list()[3] with tf.variable_scope(scope): gamma = _variable_on_cpu('gamma', [ channels, ], tf.constant_initializer(1.0), dtype='float32') beta = _variable_on_cpu('beta', [ channels, ], tf.constant_initializer(0.0), dtype='float32') moving_mean = _variable_on_cpu('moving_mean', [ channels, ], dtype='float32', initializer=tf.zeros_initializer(), trainable=False) moving_variance = _variable_on_cpu('moving_variance', [ channels, ], dtype='float32', initializer=tf.zeros_initializer(), trainable=False) tf.add_to_collection('params', gamma) tf.add_to_collection('params', beta) tf.add_to_collection('params', moving_mean) tf.add_to_collection('params', moving_variance) if not phase_train: normed_x, _, _ = tf.nn.fused_batch_norm(x, gamma, beta, mean=moving_mean, variance=moving_variance, is_training=False, epsilon=cfg.bn_eps) else: normed_x, batch_mean, batch_var = tf.nn.fused_batch_norm( x, gamma, beta, is_training=True, epsilon=cfg.bn_eps) update_moving_mean = moving_averages.assign_moving_average( moving_mean, batch_mean, cfg.bn_momentum) update_moving_variance = moving_averages.assign_moving_average( moving_variance, batch_var, cfg.bn_momentum) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_moving_mean) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_moving_variance) return normed_x, [x, moving_mean, moving_variance, beta, gamma]
def linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd, use_nonlinearity=True): """ Standard linear neural network layer """ with tf.variable_scope(scope_name) as scope: weights = _variable_with_weight_decay('weights', [n_inputs, n_outputs], stddev=stddev, wd=wd) biases = _variable_on_cpu('biases', [n_outputs], tf.constant_initializer(0.0)) activation = tf.nn.xw_plus_b(state_below, weights, biases, name="activation") if use_nonlinearity: output = tf.nn.relu(activation, name=scope.name) else: output = activation _activation_summary(output) return output
def linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd): """ Standard linear neural network layer """ with tf.variable_scope(scope_name) as scope: weights = _variable_with_weight_decay( 'weights', [n_inputs, n_outputs], stddev=stddev, wd=wd ) biases = _variable_on_cpu( 'biases', [n_outputs], tf.constant_initializer(0.0) ) output = tf.nn.xw_plus_b(state_below, weights, biases, name=scope.name) _activation_summary(output) return output
def conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd): """ A Standard convolutional layer """ with tf.variable_scope(scope_name) as scope: kernel = _variable_with_weight_decay( "weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs], wd=wd ) conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu("biases", [n_outputs], tf.constant_initializer(0.0)) bias = tf.add(conv, biases) output = tf.nn.relu(bias, name=scope.name) _activation_summary(output) return output
def conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd): """ A Standard convolutional layer """ with tf.variable_scope(scope_name) as scope: kernel = _variable_with_weight_decay( "weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs], wd=wd) conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu("biases", [n_outputs], tf.constant_initializer(0.0)) bias = tf.add(conv, biases) output = tf.nn.relu(bias, name=scope.name) _activation_summary(output) return output
def __init__(self, input_dim=None, output_dim=1, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_weight=0, sync=False, workers=20): Model.__init__(self) #self.graph = tf.Graph() #with self.graph.as_default(): with tf.device('/cpu:0'): self.X = tf.sparse_placeholder(dtype) self.y = tf.placeholder(dtype) init_vars = [('w', [input_dim, output_dim], 'xavier', dtype), ('b', [output_dim], 'zero', dtype)] self.vars = utils.init_var_map(init_vars, init_path) w = self.vars['w'] b = self.vars['b'] xw = tf.sparse_tensor_dense_matmul(self.X, w) logits = tf.reshape(xw + b, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \ l2_weight * tf.nn.l2_loss(xw) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step)
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None): in_channel = shape[2] out_channel = shape[3] k_size = shape[0] with tf.variable_scope(name) as scope: kernel = _variable_with_weight_decay( 'ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None) conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [out_channel], tf.constant_initializer(0.0)) bias = tf.nn.bias_add(conv, biases) if activation is True: conv_out = tf.nn.relu( batch_norm_layer(bias, train_phase, scope.name)) else: conv_out = batch_norm_layer(bias, train_phase, scope.name) return conv_out
def setup_graph( self, images, phase_train ): # previous inference() labels,inference, batch_size -- in order to get batch_size at running time #rather than using fixed batch_size in graph set up, revise it in inference: batchsize = tf.shape(images)[0] # yike !!! print('GGG') print(images.get_shape()) # norm1 norm1 = tf.nn.lrn(images, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75, name='norm1') print(norm1.get_shape()) # conv1 conv1 = conv_layer_with_bn( norm1, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1") # yike: 7 too large? how about 3? print(conv1.get_shape()) # pool1 pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') print('111111') print(pool1.get_shape()) print(pool1_indices.get_shape()) # conv2 conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2") # pool2 pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') print('22222') print(pool2.get_shape()) print(pool2_indices.get_shape()) # conv3 conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3") # pool3 pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') print('33333') print(pool3.get_shape()) print(pool3_indices.get_shape()) # conv4 conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4") # pool4 pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool4') print('44444') print(pool4.get_shape()) print(pool4_indices.get_shape()) """ End of encoder """ """ start upsample """ # upsample4 # Need to change when using different dataset out_w, out_h # upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4') pool3_shape = pool3.get_shape() upsample4 = deconv_layer( pool4, [2, 2, 64, 64], tf.stack([batchsize, pool3_shape[1], pool3_shape[2], 64]), 2, "up4") #45, 60, #concat 4 yike #combined4=tf.concat(axis=3,values=(upsample4,pool3)) combined4 = tf.concat(axis=3, values=(upsample4, conv4)) #print(tf.stack([batchsize, 45, 60, 64])) # decode 4 conv_decode4 = conv_layer_with_bn(combined4, [7, 7, 128, 64], phase_train, False, name="conv_decode4") print('d4444444') print(conv_decode4.get_shape()) # upsample 3 # upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3') pool2_shape = pool2.get_shape() upsample3 = deconv_layer( conv_decode4, [2, 2, 64, 64], tf.stack([batchsize, pool2_shape[1], pool2_shape[2], 64]), 2, "up3") #90, 120 #concat 3 yike # combined3=tf.concat(axis=3,values=(upsample3,pool2)) combined3 = tf.concat(axis=3, values=(upsample3, conv3)) # decode 3 conv_decode3 = conv_layer_with_bn(combined3, [7, 7, 128, 64], phase_train, False, name="conv_decode3") print('d333333') print(conv_decode3.get_shape()) # upsample2 # upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2') pool1_shape = pool1.get_shape() upsample2 = deconv_layer( conv_decode3, [2, 2, 64, 64], tf.stack([batchsize, pool1_shape[1], pool1_shape[2], 64]), 2, "up2") #180, 240 #concat 2 yike #combined2=tf.concat(axis=3,values=(upsample2,pool1)) combined2 = tf.concat(axis=3, values=(upsample2, conv2)) # decode 2 conv_decode2 = conv_layer_with_bn(combined2, [7, 7, 128, 64], phase_train, False, name="conv_decode2") print('d22222') print(conv_decode2.get_shape()) # upsample1 # upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1') upsample1 = deconv_layer( conv_decode2, [2, 2, 64, 64], tf.stack([batchsize, self.args.image_h, self.args.image_w, 64]), 2, "up1" ) # IMAGE_HEIGHT, IMAGE_WIDTH yike !!!! deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, 360, 480, 64], 2, "up1") #concat 1 yike #combined2=tf.concat(axis=3,values=(upsample2,pool1)) combined1 = tf.concat(axis=3, values=(upsample1, conv1)) # decode4 conv_decode1 = conv_layer_with_bn(combined1, [7, 7, 128, 64], phase_train, False, name="conv_decode1") print('d111111') print(conv_decode1.get_shape()) """ end of Decode """ """ Start Classify """ # output predicted class number (6) with tf.variable_scope('conv_classifier') as scope: kernel = _variable_with_weight_decay( 'weights', shape=[1, 1, 64, self.num_classes], initializer=msra_initializer(1, 64), wd=0.0005) conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME') print('cv') print(conv.get_shape()) biases = _variable_on_cpu('biases', [self.num_classes], tf.constant_initializer(0.0)) print(biases.get_shape()) logit = tf.nn.bias_add(conv, biases, name=scope.name) #conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name) #print(conv_classifier.get_shape()) #logit = conv_classifier #print('LLL') #print(labels) #print(conv_classifier) #loss = cal_loss(conv_classifier, labels) print(logit.get_shape()) return logit # loss
def __init__(self, data_dir=None, summary_dir=None, eval_dir=None, batch_size=None, input_dim=None, output_dim=1, layer_sizes=None, layer_acts=None, drop_out=None, layer_l2=None, kernel_l2=None, l2_w=0, init_path=None, opt_algo='gd', learning_rate=1e-2, sync=False, workers=20): Model.__init__(self) eprint("------- create graph ---------------") init_vars = [] num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'tnormal', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) init_vars.append(('w1', [num_inputs * factor_order, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('k1', [num_inputs, layer_sizes[2]], 'tnormal', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(( 'w%d' % i, [layer_input, layer_output], 'tnormal', )) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) with tf.name_scope('input_%d' % FLAGS.task_index) as scope: self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] xw = [ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ] x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1) l = tf.nn.dropout(utils.activate(x, layer_acts[0]), self.layer_keeps[0]) w1 = self.vars['w1'] k1 = self.vars['k1'] b1 = self.vars['b1'] p = tf.reduce_sum( tf.reshape( tf.matmul( tf.reshape( tf.transpose( tf.reshape(l, [-1, num_inputs, factor_order]), [0, 2, 1]), [-1, num_inputs]), k1), [-1, factor_order, layer_sizes[2]]), 1) l = tf.nn.dropout( utils.activate(tf.matmul(l, w1) + b1 + p, layer_acts[1]), self.layer_keeps[1]) for i in range(2, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) ## logits l = tf.reshape(l, [-1]) self.y_prob = tf.sigmoid(l) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y)) if layer_l2 is not None: self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1)) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) if kernel_l2 is not None: self.loss += kernel_l2 * tf.nn.l2_loss(k1) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def __init__(self, data_dir=None, summary_dir=None, eval_dir=None, batch_size=None, input_dim=None, output_dim=1, layer_sizes=None, layer_acts=None, drop_out=None, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, layer_l2=None, sync=False, workers=20): Model.__init__(self) eprint("-------- create graph ----------") init_vars = [] # linear part init_vars.append(('linear', [input_dim, output_dim], 'xavier', dtype)) init_vars.append(('bias', [output_dim], 'zero', dtype)) num_inputs = len(layer_sizes[0]) factor_order = layer_sizes[1] for i in range(num_inputs): layer_input = layer_sizes[0][i] layer_output = factor_order # field_sizes[i] stores the i-th field feature number init_vars.append(('w0_%d' % i, [layer_input, layer_output], 'xavier', dtype)) init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype)) # full connection node_in = num_inputs * factor_order init_vars.append(('w1', [node_in, layer_sizes[2]], 'xavier', dtype)) init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype)) for i in range(2, len(layer_sizes) - 1): layer_input = layer_sizes[i] layer_output = layer_sizes[i + 1] init_vars.append(('w%d' % i, [layer_input, layer_output], 'xavier', dtype)) init_vars.append(('b%d' % i, [layer_output], 'zero', dtype)) #self.graph = tf.Graph() #with self.graph.as_default(): #with tf.device('/cpu:0'): with tf.name_scope('input_%d' % FLAGS.task_index) as scope: self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)] self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(dtype) self.keep_prob_train = 1 - np.array(drop_out) self.keep_prob_test = np.ones_like(drop_out) self.layer_keeps = tf.placeholder(dtype) self.vars = utils.init_var_map(init_vars, init_path) w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)] b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)] xw = [ tf.sparse_tensor_dense_matmul(self.X[i], w0[i]) for i in range(num_inputs) ] x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1) ## normalize fmX = tf.sparse_add(self.X[0], self.X[1]) for i in range(2, num_inputs): fmX = tf.sparse_add(fmX, self.X[i]) Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(fmX, 1), [-1, output_dim]) l = tf.nn.dropout(utils.activate(x, layer_acts[0]), self.layer_keeps[0]) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] bi = self.vars['b%d' % i] eprint(l.get_shape(), wi.get_shape(), bi.get_shape()) l = tf.nn.dropout( utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]), self.layer_keeps[i]) ## FM linear part fmb = self.vars['bias'] fmw = self.vars['linear'] Xw = tf.sparse_tensor_dense_matmul(self.B, fmw) ## cross term # XV, shape: input_dim*k fmXV = tf.add_n(xw) XV_square = tf.square(fmXV) eprint(XV_square.get_shape()) # X^2 * V^2, shape: input_dim*k fmX2 = [ tf.SparseTensor(self.X[i].indices, tf.square(self.X[i].values), tf.to_int64(tf.shape(self.X[i]))) for i in range(num_inputs) ] fmV2 = [tf.square(w0[i]) for i in range(num_inputs)] fmX2V2 = [ tf.sparse_tensor_dense_matmul(fmX2[i], fmV2[i]) for i in range(num_inputs) ] X2V2 = tf.add_n(fmX2V2) eprint(X2V2.get_shape()) # 1/2 * row_sum(XV_square - X2V2), shape: input_dim*1 p = 0.5 * Xnorm * tf.reshape(tf.reduce_sum(XV_square - X2V2, 1), [-1, output_dim]) ## logits logits = tf.reshape(l + Xw + fmb + p, [-1]) ## predict self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(Xw) if layer_l2 is not None: self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1)) for i in range(1, len(layer_sizes) - 1): wi = self.vars['w%d' % i] self.loss += layer_l2[i] * tf.nn.l2_loss(wi) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def __init__(self, data_dir=None, summary_dir=None, eval_dir=None, batch_size=None, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, sync=False, workers=20): Model.__init__(self) eprint("-------- create graph ----------") with tf.name_scope('input_%d' % FLAGS.task_index) as scope: self.X = tf.sparse_placeholder(tf.float32, name='X') self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(tf.float32, shape=[None], name='y') init_vars = [('linear', [input_dim, output_dim], 'xavier', dtype), ('U', [input_dim, factor_order], 'xavier', dtype), ('V', [input_dim, factor_order], 'xavier', dtype), ('bias', [output_dim], 'zero', dtype)] self.vars = utils.init_var_map(init_vars, None) w = self.vars['linear'] U = self.vars['U'] V = self.vars['V'] b = self.vars['bias'] ## normalize Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(self.X, 1), [-1, output_dim]) ## linear term Xw = tf.sparse_tensor_dense_matmul(self.B, w, name="Xw") ## cross term XU = tf.sparse_tensor_dense_matmul(self.X, U, name="XU") XV = tf.sparse_tensor_dense_matmul(self.X, V, name="XV") X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) p = 0.5 * Xnorm * tf.reshape( tf.reduce_sum( XU * XV - tf.sparse_tensor_dense_matmul(X_square, U * V), 1), [-1, output_dim]) logits = tf.reshape(b + Xw + p, [-1]) self.y_prob = tf.sigmoid(logits) # self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(Xw) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def __init__(self, data_dir=None, eval_dir=None, summary_dir=None, num_epochs=1, batch_size=None, input_dim=None, output_dim=1, factor_order=10, init_path=None, opt_algo='gd', learning_rate=1e-2, l2_w=0, l2_v=0, sync=False, workers=20): Model.__init__(self) data_file_list = tf.gfile.ListDirectory(data_dir) data_file_list = [x for x in data_file_list if '.tf' in x] data_file_list = [os.path.join(data_dir, x) for x in data_file_list] data_file_list.sort() eprint("input files:", data_file_list) input_files = data_file_list eprint("-------- create graph ----------") #self.graph = tf.Graph() #with self.graph.as_default(): with tf.device('/cpu:0'): self.X = tf.sparse_placeholder(tf.float32, name='X') self.B = tf.sparse_placeholder(tf.float32, name='B') self.y = tf.placeholder(tf.float32, shape=[None], name='y') init_vars = [('linear', [input_dim, output_dim], 'xavier', dtype), ('V', [input_dim, factor_order], 'xavier', dtype), ('bias', [output_dim], 'zero', dtype)] self.vars = utils.init_var_map(init_vars, None) w = self.vars['linear'] V = self.vars['V'] b = self.vars['bias'] ## linear term Xw = tf.sparse_tensor_dense_matmul(self.B, w) ## cross term # X^2 X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values), tf.to_int64(tf.shape(self.X))) # XV, shape: input_dim*k XV_square = tf.square(tf.sparse_tensor_dense_matmul(self.X, V)) # X^2 * V^2, shape: input_dim*k X2V2 = tf.sparse_tensor_dense_matmul(X_square, tf.square(V)) ## normalize Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(self.X, 1), [-1, output_dim]) # 1/2 * row_sum(XV_square - X2V2), shape: input_dim*1 p = 0.5 * Xnorm * tf.reshape(tf.reduce_sum(XV_square - X2V2, 1), [-1, output_dim]) logits = tf.reshape(b + Xw + p, [-1]) self.y_prob = tf.sigmoid(logits) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \ l2_w * tf.nn.l2_loss(Xw) self.global_step = _variable_on_cpu( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) if sync: self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate, workers) else: self.optimizer = utils.get_optimizer(opt_algo, learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) self.summary_op = tf.summary.merge_all()
def inception_v1_module(feed, feed_dim=256, map_size=(128, 192, 96, 64), reduce1x1_size=64, batch_norm=False): """ :param feed: :param map_size: lists of number of feature maps output by each tower (1x1, 3x3, 5x5, 1x1) inside the Inception module :param reduce1x1_size: number of feature maps output by each 1×1 convolution that precedes a large convolution :return: """ def conv2d_s1(x, W): return conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_3x3_s1(x): return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME') # follows input W_conv_1x1_1 = _variable_with_weight_decay( 'W_conv_1x1_1', shape=[1, 1, feed_dim, map_size[0]], stddev=5e-2, wd=None) b_conv_1x1_1 = _variable_on_cpu('b_conv_1x1_1', [map_size[0]], tf.constant_initializer(0.0)) # follows input W_conv_1x1_2 = _variable_with_weight_decay( 'W_conv_1x1_2', shape=[1, 1, feed_dim, reduce1x1_size], stddev=5e-2, wd=None) b_conv_1x1_2 = _variable_on_cpu('b_conv_1x1_2', [reduce1x1_size], tf.constant_initializer(0.0)) # follows input W_conv_1x1_3 = _variable_with_weight_decay( 'W_conv_1x1_3', shape=[1, 1, feed_dim, reduce1x1_size], stddev=5e-2, wd=None) b_conv_1x1_3 = _variable_on_cpu('b_conv_1x1_3', [reduce1x1_size], tf.constant_initializer(0.0)) # follows 1x1_2 # attention to the shape paras!!!! W_conv_3x3 = _variable_with_weight_decay( 'W_conv_3x3', shape=[3, 3, reduce1x1_size, map_size[1]], stddev=5e-2, wd=None) b_conv_3x3 = _variable_on_cpu('b_conv_3x3', [map_size[1]], tf.constant_initializer(0.0)) # follows 1x1_3 W_conv_5x5 = _variable_with_weight_decay( 'W_conv_5x5', shape=[5, 5, reduce1x1_size, map_size[2]], stddev=5e-2, wd=None) b_conv_5x5 = _variable_on_cpu('b_conv_5x5', [map_size[2]], tf.constant_initializer(0.0)) # follows max pooling W_conv_1x1_4 = _variable_with_weight_decay( 'W_conv_1x1_4', shape=[1, 1, feed_dim, map_size[3]], stddev=5e-2, wd=None) b_conv_1x1_4 = _variable_on_cpu('b_conv_1x1_4', [map_size[3]], tf.constant_initializer(0.0)) # Inception Module conv_1x1_1 = conv2d_s1(feed, W_conv_1x1_1) + b_conv_1x1_1 conv_1x1_2 = tf.nn.relu(conv2d_s1(feed, W_conv_1x1_2) + b_conv_1x1_2) conv_1x1_3 = tf.nn.relu(conv2d_s1(feed, W_conv_1x1_3) + b_conv_1x1_3) conv_3x3 = conv2d_s1(conv_1x1_2, W_conv_3x3) + b_conv_3x3 conv_5x5 = conv2d_s1(conv_1x1_3, W_conv_5x5) + b_conv_5x5 maxpool1 = max_pool_3x3_s1(feed) conv_1x1_4 = conv2d_s1(maxpool1, W_conv_1x1_4) + b_conv_1x1_4 # concatenate all the feature maps and hit them with a relu concat = tf.concat([conv_1x1_1, conv_3x3, conv_5x5, conv_1x1_4], 3) if batch_norm: biases = _variable_on_cpu('biases', sum(map_size), tf.constant_initializer(0.0)) mean, variance = tf.nn.moments(concat, axes=[0]) epsilon = 1e-5 gamma = _variable_on_cpu('gammas', sum(map_size), tf.constant_initializer(1.0)) pre_activation = tf.nn.batch_normalization(concat, mean, variance, biases, gamma, epsilon) else: pre_activation = concat after_activation = tf.nn.relu(pre_activation, name='activated_out') _activation_summary(after_activation) return after_activation
def conv_bn_relu(x, out_channels, ksize, stride=1, groups=1, qweight=False, qactivation=False, padding='SAME', scale=None, has_bn=True, has_relu=True, phase_train=False, scope=None): node = {'input': x, 'output': None, 'W': None, 'b': None} cfg_node = { 'name': scope, 'type': 'Conv2D', 'out': out_channels, 'in': 0, 'ksize': ksize, 'stride': stride, 'groups': groups, 'padding': padding, 'active': has_relu } with tf.variable_scope(scope): in_channels = x.shape.as_list()[3] cfg_node['in'] = in_channels assert in_channels % groups == 0 and out_channels % groups == 0 shape = [ksize, ksize, in_channels // groups, out_channels] kernel = _variable_with_weight_decay('W', shape) tf.add_to_collection('params', kernel) node['W'] = kernel if qweight: kernel = int_quantize(kernel, scale[scope]['W'], num_bits=8, phase_train=phase_train) if groups == 1: f = tf.nn.conv2d(x, kernel, [1, stride, stride, 1], padding=padding) else: if out_channels == groups and in_channels == groups: f = tf.nn.depthwise_conv2d(x, tf.transpose(kernel, (0, 1, 3, 2)), [1, stride, stride, 1], padding=padding) else: kernel_list = tf.split(kernel, groups, axis=3) x_list = tf.split(x, groups, axis=3) f = tf.concat([ tf.nn.conv2d(x_list[i], kernel_list[i], [1, stride, stride, 1], padding=padding) for i in range(groups) ], axis=3) if has_bn: f, bn_info = batch_norm_for_conv(f, phase_train) _, moving_mean, moving_variance, beta, gamma = bn_info s = gamma / tf.sqrt(moving_variance + cfg.bn_eps) node['W'] = kernel * tf.reshape(s, (1, 1, 1, -1)) node['b'] = beta - s * moving_mean else: biases = _variable_on_cpu('b', out_channels, tf.constant_initializer(0.0)) tf.add_to_collection('params', biases) node['b'] = biases f = tf.nn.bias_add(f, biases) if has_relu: f = tf.nn.relu6(f) node['output'] = f print(scope, f.shape) tf.add_to_collection('nodes', node) tf.add_to_collection('cfg_nodes', cfg_node) if qactivation: f = int_quantize(f, scale[scope]['output'], num_bits=8, phase_train=phase_train) return f