示例#1
0
def _embedding_alexnet(is_training, images, params):
    with tf.variable_scope('Siamese', 'CFCASiamese', [images], reuse=tf.AUTO_REUSE):
        with arg_scope(
                [layers.conv2d], activation_fn=tf.nn.relu):
            net = layers.conv2d(
                images, 96, [11, 11], 4, padding='VALID', scope='conv1')
            # net = layers.batch_norm(net, decay=0.9, epsilon=1e-06, is_training=is_training)
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
            net = layers.conv2d(net, 256, [5, 5], scope='conv2')
            # net = layers.batch_norm(net, decay=0.9, epsilon=1e-06, is_training=is_training)
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = layers_lib.dropout(
                net, keep_prob=0.7, is_training=is_training)
            net = layers.conv2d(net, 384, [3, 3], scope='conv3')
            net = layers.conv2d(net, 256, [3, 3], scope='conv4')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')
            net = layers_lib.dropout(
                net, keep_prob=0.7, is_training=is_training)
            net = layers_lib.flatten(net, scope='flatten1')
            net = layers_lib.fully_connected(net, 1024, scope='fc1',
                                             weights_regularizer=layers.l2_regularizer(0.0005))
            net = layers_lib.dropout(
                net, keep_prob=0.5, is_training=is_training)
            net = layers_lib.fully_connected(net, params.embedding_size, scope='fc2',
                                             weights_regularizer=layers.l2_regularizer(0.0005))
            return net
def slim_net_original(image, keep_prob):
    with arg_scope([layers.conv2d, layers.fully_connected], biases_initializer=tf.random_normal_initializer(stddev=0.1)):

        # conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME',
        # activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None,
        # weights_initializer=initializers.xavier_initializer(), weights_regularizer=None,
        # biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, scope=None):
        net = layers.conv2d(image, 32, [5, 5], scope='conv1', weights_regularizer=regularizers.l1_regularizer(0.5))

        # max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None)
        net = layers.max_pool2d(net, 2, scope='pool1')

        net = layers.conv2d(net, 64, [5, 5], scope='conv2', weights_regularizer=regularizers.l2_regularizer(0.5))
        summaries.summarize_tensor(net, tag='conv2')

        net = layers.max_pool2d(net, 2, scope='pool2')

        net = layers.flatten(net, scope='flatten1')

        # fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None,
        # normalizer_params=None, weights_initializer=initializers.xavier_initializer(),
        # weights_regularizer=None, biases_initializer=init_ops.zeros_initializer,
        # biases_regularizer=None, scope=None):
        net = layers.fully_connected(net, 1024, scope='fc1')

        # dropout(inputs, keep_prob=0.5, is_training=True, scope=None)
        net = layers.dropout(net, keep_prob=keep_prob, scope='dropout1')

        net = layers.fully_connected(net, 10, scope='fc2')
    return net
示例#3
0
def get_slim_arch_bn(inputs, isTrainTensor, num_classes=1000, scope='vgg_16'):
    with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.

        filters = 64

        # Arg scope set default parameters for a list of ops
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers_lib.repeat(inputs,
                                    2,
                                    layers.conv2d,
                                    filters, [3, 3],
                                    scope='conv1')
            #p_0 = layers_lib.max_pool2d(net, [2, 2], scope='pool1')

            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    filters, [3, 3],
                                    scope='conv2')

            # net = layers_lib.repeat(net, 8, layers.conv2d, filters, [3, 3],scope='conv3')

            #last_conv = layers_lib.max_pool2d(net, [2, 2], scope='pool2')

            last_conv = net
            # Here we have 14x14 filters
            net = tf.reduce_mean(last_conv, [1, 2])  # Global average pooling

            # add layer with float 32 mask of same shape as global average pooling out
            # feed default with ones, leave placeholder

            mask = tf.placeholder_with_default(tf.ones_like(net),
                                               shape=net.shape,
                                               name='gap_mask')
            net = tf.multiply(net, mask)

            net = layers_lib.fully_connected(net,
                                             num_classes,
                                             activation_fn=None,
                                             biases_initializer=None,
                                             scope='softmax_logits')

            with tf.variable_scope("raw_CAM"):
                w_tensor_name = "vgg_16/softmax_logits/weights:0"
                s_w = tf.get_default_graph().get_tensor_by_name(w_tensor_name)
                softmax_weights = tf.expand_dims(tf.expand_dims(s_w, 0),
                                                 0)  # reshape to match 1x1xFxC
                # tensor mult from (N x lh x lw x F) , (1 x 1 x F x C)
                cam = tf.tensordot(last_conv,
                                   softmax_weights, [[3], [2]],
                                   name='cam_out')

            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            return net, end_points
示例#4
0
def get_slim_arch(inputs, num_classes=1000, scope='vgg_16'):
    """
    from vgg16 https://github.com/tensorflow/models/blob/master/research/slim/nets/vgg.py
    :param inputs:
    :param num_classes:
    :param scope:
    :return:
    """
    with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.

        # Arg scope set default parameters for a list of ops
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers_lib.repeat(inputs,
                                    2,
                                    layers.conv2d,
                                    64, [3, 3],
                                    scope='conv1')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
            net = layers_lib.repeat(net,
                                    2,
                                    layers.conv2d,
                                    128, [3, 3],
                                    scope='conv2')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    256, [3, 3],
                                    scope='conv3')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv4')
            net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
            net = layers_lib.repeat(net,
                                    3,
                                    layers.conv2d,
                                    512, [3, 3],
                                    scope='conv5')

            # Here we have 14x14 filters
            net = tf.reduce_mean(net, [1, 2])  # Global average pooling
            net = layers_lib.fully_connected(net,
                                             num_classes,
                                             activation_fn=None,
                                             biases_initializer=None,
                                             scope='softmax_logits')

            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            return net, end_points
示例#5
0
def get_conditional_batch_norm_param(conditional_layer,
                                     output_dim,
                                     scope='gamma',
                                     activation_fn=None):
    """Outputs the batch norm parameter transformed from the `conditional_layer` using a fully connected layer."""
    if conditional_layer is None:
        raise ValueError('`conditional_layer` must not be None.')
    return layers.fully_connected(conditional_layer,
                                  output_dim,
                                  scope=scope,
                                  activation_fn=activation_fn)
def slim_net_original(image, keep_prob):
    with arg_scope(
        [layers.conv2d, layers.fully_connected],
            biases_initializer=tf.random_normal_initializer(stddev=0.1)):

        # conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME',
        # activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None,
        # weights_initializer=initializers.xavier_initializer(), weights_regularizer=None,
        # biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, scope=None):
        net = layers.conv2d(
            image,
            32, [5, 5],
            scope='conv1',
            weights_regularizer=regularizers.l1_regularizer(0.5))

        # max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None)
        net = layers.max_pool2d(net, 2, scope='pool1')

        net = layers.conv2d(
            net,
            64, [5, 5],
            scope='conv2',
            weights_regularizer=regularizers.l2_regularizer(0.5))
        summaries.summarize_tensor(net, tag='conv2')

        net = layers.max_pool2d(net, 2, scope='pool2')

        net = layers.flatten(net, scope='flatten1')

        # fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None,
        # normalizer_params=None, weights_initializer=initializers.xavier_initializer(),
        # weights_regularizer=None, biases_initializer=init_ops.zeros_initializer,
        # biases_regularizer=None, scope=None):
        net = layers.fully_connected(net, 1024, scope='fc1')

        # dropout(inputs, keep_prob=0.5, is_training=True, scope=None)
        net = layers.dropout(net, keep_prob=keep_prob, scope='dropout1')

        net = layers.fully_connected(net, 10, scope='fc2')
    return net
示例#7
0
def _embedding_2logits(is_training, embeddings, labels):
    """embeddings to 2 logits and losss"""
    logits = layers_lib.fully_connected(
        embeddings, 2, scope='fc3', reuse=tf.AUTO_REUSE)
    logits_array = tf.split(logits, 2, 1)
    logits_diff = tf.subtract(logits_array[0], logits_array[1])

    if labels is not None:
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.cast(labels, tf.int64)))
        return loss, logits_diff
    else:
        return None, logits_diff
示例#8
0
def _loss_2channels_softmax_alex(images, labels, params, is_training):
    # params.embedding_size = 2
    embeddings = _embedding_alexnet(is_training, images, params)
    logits = layers_lib.fully_connected(
        embeddings, 2, scope='fc3', reuse=tf.AUTO_REUSE)
    # logits = embeddings
    logits_array = tf.split(logits, 2, 1)
    logits_diff = tf.subtract(logits_array[0], logits_array[1])

    if labels is not None:
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.cast(labels, tf.int64)))
        return loss, logits_diff
    else:
        return None, logits_diff
示例#9
0
def main(_):

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    x = tf.placeholder(tf.float32, shape=[None, 14 * 4])
    y_ = tf.placeholder(tf.float32, shape=[None, 2])

    x_image = tf.reshape([-1, 14, 4, 1])

    n_conv1 = 384  # TBD
    L_conv1 = 9  # TBD
    maxpool_len1 = 2
    conv1 = convolution2d(x_image,
                          n_conv1, [L_conv1, 4],
                          padding="VALID",
                          normalizer_fn=None)
    conv1_pool_len = int((14 - L_conv1 + 1) / maxpool_len1)

    n_conv2 = n_conv1
    L_conv2 = 5
    maxpool_len2 = int(
        conv1_pool_len - L_conv2 +
        1)  # global maxpooling (max-pool across temporal domain)
    conv2 = convolution2d(conv1_pool,
                          n_conv2, [L_conv2, 1],
                          padding='VALID',
                          normalizer_fn=None)
    conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])
    # conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

    # LINEAR FC LAYER
    y_conv = fully_connected(flatten(conv2_pool), 2, activation_fn=None)
    y_conv_softmax = tf.nn.softmax(y_conv)

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    sess.run(tf.initialize_all_variables())
示例#10
0
def transform_to_covariance_matrices(input_vectors, matrix_size):
  """Construct covariance matrices via transformations from input_vectors.

  Args:
    input_vectors: A [batch size x input size] batch of vectors to transform.
    matrix_size: An integer indicating one dimension of the (square) output
        matrix.
  Returns:
    A [batch size x matrix_size x matrix_size] batch of covariance matrices.
  """
  combined_values = layers.fully_connected(
      input_vectors, matrix_size**2 + 2, activation_fn=None)
  return sign_magnitude_positive_definite(
      raw=array_ops.reshape(combined_values[..., :-2],
                            array_ops.concat([
                                array_ops.shape(combined_values)[:-1],
                                [matrix_size, matrix_size]
                            ], 0)),
      off_diagonal_scale=combined_values[..., -2],
      overall_scale=combined_values[..., -1])
示例#11
0
def transform_to_covariance_matrices(input_vectors, matrix_size):
  """Construct covariance matrices via transformations from input_vectors.

  Args:
    input_vectors: A [batch size x input size] batch of vectors to transform.
    matrix_size: An integer indicating one dimension of the (square) output
        matrix.
  Returns:
    A [batch size x matrix_size x matrix_size] batch of covariance matrices.
  """
  combined_values = layers.fully_connected(
      input_vectors, matrix_size**2 + 2, activation_fn=None)
  return sign_magnitude_positive_definite(
      raw=array_ops.reshape(combined_values[..., :-2],
                            array_ops.concat([
                                array_ops.shape(combined_values)[:-1],
                                [matrix_size, matrix_size]
                            ], 0)),
      off_diagonal_scale=combined_values[..., -2],
      overall_scale=combined_values[..., -1])
示例#12
0
    def __init__(self, sequence_length, num_classes):

        #placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        x_image = tf.reshape(self.input_x, shape=[-1, 14, 4, 1])

        n_conv1 = 44
        L_conv1 = 5
        maxpool_len1 = 2
        conv1 = convolution2d(x_image,
                              n_conv1, [L_conv1, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1])
        conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

        # n_conv2 = n_conv1
        # L_conv2 = 3
        # maxpool_len2 = int(conv1_pool_len - L_conv2 + 1)  # global maxpooling (max-pool across temporal domain)
        # conv2 = convolution2d(conv1_pool, n_conv2, [L_conv2, 1], padding='VALID', normalizer_fn=None)
        # conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv1_pool), 2, activation_fn=None)
        prediction = tf.nn.softmax(y_conv)

        self.cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_conv,
                                                    labels=self.input_y))
        # train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

        correct_prediction = tf.equal(tf.argmax(prediction, 1),
                                      tf.argmax(self.input_y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 def forward_conv(self, inp, reuse=False, scope=''):
     channels = 3
     # inp = tf.reshape(inp, [-1, img_size, img_size, channels])
     hidden1 = self.conv_block(inp,
                               reuse,
                               scope + '0',
                               max_pool=self.max_pool)
     hidden2 = self.conv_block(hidden1,
                               reuse,
                               scope + '1',
                               max_pool=self.max_pool)
     hidden3 = self.conv_block(hidden2,
                               reuse,
                               scope + '2',
                               max_pool=self.max_pool)
     hidden4 = self.conv_block(hidden3,
                               reuse,
                               scope + '3',
                               max_pool=self.max_pool)
     hidden4 = tf.reduce_mean(hidden4, [1, 2])
     output = tf_layers.fully_connected(hidden4, self.dim_output, None)
     # return tf.matmul(hidden4, weights['w5']) + weights['b5']
     return output
示例#14
0
def _loss_siamese(inputs, labels, params):
    inputs = tf.split(inputs, 2, 2)
    input0 = inputs[0]
    input1 = inputs[1]

    out0, state0 = _rnn(input0, 0.8, "side", 46)
    out1, state1 = _rnn(input1, 0.8, "side", 46)

    out = tf.concat([out0, out1], 2)
    out, state = _rnn(out, 0.8, "concat", 23)

    out = tf.sigmoid(state[-1])
    logits = layers_lib.fully_connected(
        out, 2, scope='fc1', reuse=tf.AUTO_REUSE)

    logits_array = tf.split(logits, 2, 1)
    logits_diff = tf.subtract(logits_array[0], logits_array[1])
    if labels is not None:
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.cast(labels, tf.int64)))
        return loss, logits_diff
    else:
        return None, logits_diff
    def build_layer_fn(x, w_initializer, b_initializer):
      var_collection = {
          'weights': ['CONTRIB_LAYERS_FC_WEIGHTS'],
          'biases': ['CONTRIB_LAYERS_FC_BIASES']
      }
      x = contrib_layers.flatten(x)
      net = contrib_layers.fully_connected(
          x,
          3,
          weights_initializer=w_initializer,
          biases_initializer=b_initializer,
          variables_collections=var_collection)
      weight_vars = ops.get_collection('CONTRIB_LAYERS_FC_WEIGHTS')
      self.assertEquals(1, len(weight_vars))
      bias_vars = ops.get_collection('CONTRIB_LAYERS_FC_BIASES')
      self.assertEquals(1, len(bias_vars))
      expected_normalized_vars = {
          'contrib.layers.fully_connected.weights': weight_vars[0]
      }
      expected_not_normalized_vars = {
          'contrib.layers.fully_connected.bias': bias_vars[0]
      }

      return net, expected_normalized_vars, expected_not_normalized_vars
        def build_layer_fn(x, w_initializer, b_initializer):
            var_collection = {
                'weights': ['CONTRIB_LAYERS_FC_WEIGHTS'],
                'biases': ['CONTRIB_LAYERS_FC_BIASES']
            }
            x = contrib_layers.flatten(x)
            net = contrib_layers.fully_connected(
                x,
                3,
                weights_initializer=w_initializer,
                biases_initializer=b_initializer,
                variables_collections=var_collection)
            weight_vars = ops.get_collection('CONTRIB_LAYERS_FC_WEIGHTS')
            self.assertEquals(1, len(weight_vars))
            bias_vars = ops.get_collection('CONTRIB_LAYERS_FC_BIASES')
            self.assertEquals(1, len(bias_vars))
            expected_normalized_vars = {
                'contrib.layers.fully_connected.weights': weight_vars[0]
            }
            expected_not_normalized_vars = {
                'contrib.layers.fully_connected.bias': bias_vars[0]
            }

            return net, expected_normalized_vars, expected_not_normalized_vars
示例#17
0
def batchnorm_classifier(inputs):
  inputs = layers.batch_norm(inputs, decay=0.1)
  return layers.fully_connected(inputs, 1, activation_fn=math_ops.sigmoid)
示例#18
0
@author: Zhukun Luo
Jiangxi university of finance and economics
'''
from tensorflow.contrib.layers.python.layers.layers import fully_connected
'''
循环神经网络通过保存历史信息来帮助当前的决策
lstm主要用来解决长期依赖问题
与单一的tanh循环体结构不同,lstm拥有三个‘门’结构
‘门的结构’:使用sigmoid神经网络和一个按位乘法的操作sigmoid(0,1),相当于信息的门
为了使RNN更有效保存长期记忆。‘遗忘门’和‘输入门’就至关重要
‘遗忘门’:f=sigmoid(W1x+W2h)
当RNN忘记了部分之前的状态后,他还需要从当前的输入补充最新的记忆,这个过程就是输入门完成的。
'''
#定义一个LSTM结构。在tensorflow中通过一句简单的命令就可以实现一个完整的LSTM结构
#lstm中使用的变量也会在函数中自动被声明
import tensorflow as tf
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_hidden_size)
#将lstm中的状态初始化为全0的数组。BasicLSTMCell类提供了zero_state函数来生成全0的初始状态。state是一个包含两个张量的lstmstatetuple类,其中state。c和state。h分别对应了c状态和h状态
state = lstm.zero_state(batch_size, dtype=tf.float32)
#定义损失函数
loss = 0.0
#虽然在测试RNN可以处理任意长度的序列,但是在训练中为了将循环网络展开成前馈网络,我们需要知道训练数据的序列长度。用num_step表示其长度
#dynamic_rnn是动态处理变长的方法
for i in range(num_steps):
    #在第一个时刻声明lstm结构中使用的变量,在之后的时刻都需要复用之前定义好的变量
    if i > 0:
        tf.get_variable_scope().reuse_variables()
    #每一步处理时间序列中的一个时刻,将当前输入current_input和前一时刻状态state(h1和从)传入定义的lstm结构可以得到当前的lstm的输出lstm_output和更新后状态state
    lstm_output, state = lstm(current_input, state)
    final_output = fully_connected(lstm_output)
    loss += calc_loss(final_output, expect_output)
示例#19
0
def logistic_classifier(inputs):
  return layers.fully_connected(inputs, 1, activation_fn=math_ops.sigmoid)
示例#20
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])

        # Create the model
        x_image = tf.reshape(x, [-1, 101, 4, 1])

        # CONVOLUTIONAL LAYER(S)
        n_conv3 = 64
        L_conv3 = 9
        maxpool_len3 = int(101 - L_conv3 +
                           1)  # global maxpooling ("across temporal domain")
        conv3 = convolution2d(x_image,
                              n_conv3, [L_conv3, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv3_pool = max_pool2d(conv3, [maxpool_len3, 1], [maxpool_len3, 1])

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv3_pool), 2, activation_fn=None)
        y_conv_softmax = tf.nn.softmax(y_conv)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        stop_condition = None
        t0 = time.time()
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = y_conv_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            y_conv_softmax.eval(feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                #check stop condition:
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                #    stop_condition = 1
                prev_auc = roc_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc))
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                dropout_on: 1
            })
            if i == 7000:
                stop_condition = 1
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = y_conv_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1]
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    y_conv_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1]
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()
示例#21
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    _datasets = utils.remove_non_existing_datafiles(_datasets)
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])
        conv_keep_prob = tf.placeholder(tf.float32)

        # Create the model
        x_image = tf.reshape(x, [-1, 101, 4, 1])

        # CONVOLUTIONAL LAYER(S)
        n_conv1 = 64
        L_conv1 = 9
        maxpool_len1 = 2
        conv1 = convolution2d(x_image,
                              n_conv1, [L_conv1, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1])
        conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob)
        conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

        n_conv2 = n_conv1
        L_conv2 = 5
        maxpool_len2 = int(
            conv1_pool_len - L_conv2 +
            1)  # global maxpooling (max-pool across temporal domain)
        conv2 = convolution2d(conv1_drop,
                              n_conv2, [L_conv2, 1],
                              padding='VALID',
                              normalizer_fn=None)
        conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])
        conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv2_drop), 2, activation_fn=None)
        y_conv_softmax = tf.nn.softmax(y_conv)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        prev_train_epochs_compl = 0
        stop_condition = None
        t0 = time.time()
        this_conv_keep_prob = 0.5
        final_keep_prob = 0.75
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = y_conv_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1],
                                conv_keep_prob: 1.0
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            y_conv_softmax.eval(
                                feed_dict={
                                    x: validation_batch[0],
                                    y_: validation_batch[1],
                                    conv_keep_prob: 1.0
                                })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g, conv_keep_prob %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc, this_conv_keep_prob))
                #check stop condition:
                if i == 3000 or i == 10000 or i == 20000:  # increase keep_prob at these iteration numbers (not epochs)
                    if this_conv_keep_prob < final_keep_prob:
                        this_conv_keep_prob += 0.2
                        if this_conv_keep_prob > final_keep_prob:
                            this_conv_keep_prob = final_keep_prob
                    else:
                        stop_condition = 1
                prev_train_epochs_compl = _train_epochs_completed
                prev_auc = roc_auc
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                conv_keep_prob: this_conv_keep_prob
            })
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = y_conv_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1],
                    conv_keep_prob: 1.0
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    y_conv_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1],
                        conv_keep_prob: 1.0
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()
def get_slim_arch_bn(inputs, isTrainTensor, num_classes=1000, scope='vgg_16'):
    with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.

        filters = 64

        # Arg scope set default parameters for a list of ops
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=end_points_collection):
            net = layers_lib.repeat(
                inputs,
                2,
                layers.conv2d,
                filters, [3, 3],
                scope='conv1',
                weights_regularizer=slim.l2_regularizer(0.01))
            bn_0 = tf.contrib.layers.batch_norm(net,
                                                center=True,
                                                scale=True,
                                                is_training=isTrainTensor,
                                                scope='bn1',
                                                decay=0.9)
            p_0 = layers_lib.max_pool2d(bn_0, [2, 2], scope='pool1')

            net = layers_lib.repeat(
                p_0,
                2,
                layers.conv2d,
                filters, [3, 3],
                scope='conv2',
                weights_regularizer=slim.l2_regularizer(0.01))
            bn_1 = tf.contrib.layers.batch_norm(net,
                                                center=True,
                                                scale=True,
                                                is_training=isTrainTensor,
                                                scope='bn2',
                                                decay=0.9)
            res_1 = p_0 + bn_1
            p_1 = layers_lib.max_pool2d(res_1, [2, 2], scope='pool2')

            net = layers_lib.repeat(
                p_1,
                3,
                layers.conv2d,
                filters, [4, 4],
                scope='conv3',
                weights_regularizer=slim.l2_regularizer(0.01))
            bn_2 = tf.contrib.layers.batch_norm(net,
                                                center=True,
                                                scale=True,
                                                is_training=isTrainTensor,
                                                scope='bn3',
                                                decay=0.9)
            res_2 = p_1 + bn_2
            p_2 = layers_lib.max_pool2d(res_2, [2, 2], scope='pool3')

            net = layers_lib.repeat(
                p_2,
                3,
                layers.conv2d,
                filters, [5, 5],
                scope='conv4',
                weights_regularizer=slim.l2_regularizer(0.01))
            bn_3 = tf.contrib.layers.batch_norm(net,
                                                center=True,
                                                scale=True,
                                                is_training=isTrainTensor,
                                                scope='bn4',
                                                decay=0.9)
            res_3 = p_2 + bn_3
            p_3 = layers_lib.max_pool2d(res_3, [2, 2], scope='pool4')

            net = layers_lib.repeat(
                p_3,
                3,
                layers.conv2d,
                filters, [5, 5],
                scope='conv5',
                weights_regularizer=slim.l2_regularizer(0.01))

            # Here we have 14x14 filters
            net = tf.reduce_mean(net, [1, 2])  # Global average pooling

            # add layer with float 32 mask of same shape as global average pooling out
            # feed default with ones, leave placeholder

            mask = tf.placeholder_with_default(tf.ones_like(net),
                                               shape=net.shape,
                                               name='gap_mask')
            net = tf.multiply(net, mask)

            net = layers_lib.fully_connected(net,
                                             num_classes,
                                             activation_fn=None,
                                             biases_initializer=None,
                                             scope='softmax_logits')

            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            return net, end_points
示例#23
0
def BatchNormClassifier(inputs):
  inputs = layers.batch_norm(inputs, decay=0.1, fused=True)
  return layers.fully_connected(inputs, 1, activation_fn=math_ops.sigmoid)
示例#24
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        rnn_keep_prob = 1.0
    else:
        rnn_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f=open(file_name,'w') # clear file
    f.write('dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n')
    f.close()
    for dataset_num in range(106, len(_datasets)):
        for motif_occ in range(0,2):
            success = False
            try:
                load_ENCODE_k562_dataset(dataset_num,motif_occ)
                success = True
            except:
                print('Hmm.. Something happened. Skipping dataset ' + _datasets[dataset_num])
            if success:
                with tf.variable_scope('scopename_' + str(dataset_num) + '_' + str(motif_occ)):
                    # LSTM Parameters ============================
                    lstm_n_hidden = 32 # hidden layer num features
                    # ============================================

                    x = tf.placeholder(tf.float32, shape=[None, 101*4])
                    y_ = tf.placeholder(tf.float32, shape=[None, 2])

                    # Create the model
                    x_image = tf.reshape(x, [-1,101,4,1])

                    # CONVOLUTIONAL LAYER(S)
                    n_conv1 = 128
                    L_conv1 = 9
                    n_steps1 = (101-L_conv1+1)
                    conv1 = convolution2d(x_image, n_conv1, [L_conv1,4], padding='VALID', normalizer_fn=None)
                    conv1_resh = tf.reshape(conv1, [-1,n_steps1,n_conv1])

                    # LSTM LAYER(S)
                    conv1_unpacked = tf.unpack(conv1_resh, axis=1) # this func does it all for us :)
                    lstm_fw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden)
                    lstm_bw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden)
                    birnn_out,_,_ = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, conv1_unpacked, dtype=tf.float32)
                    
                    # Linear activation
                    # rnn_out = birnn_out[-1] # to use LAST of the rnn inner loops (as in the MNIST example)
                    rnn_out = tf.div(tf.add_n(birnn_out), 101) # to use the AVERAGE of the rnn inner loops
                    rnn_out_drop = tf.nn.dropout(rnn_out, rnn_keep_prob) # apply dropout to regularize the LSTM
                    pred = fully_connected(flatten(rnn_out_drop), 2, activation_fn=None)
                    pred_softmax = tf.nn.softmax(pred)

                    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y_))
                    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
                    sess.run(tf.initialize_all_variables())
                    
                    i = 0
                    prev_auc = 0.0001 # small value to prevent DIV0
                    stop_condition = None
                    t0 = time.time()
                    while stop_condition is None:
                        #if i%100 == 0:
                        if 1 == 0: # turned off
                            #t0 = time.time()
                            pred_validation_labels = None
                            true_validation_labels = None
                            prev_validation_epochs_completed = _validation_epochs_completed
                            while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once
                                if _validation_size > 1024*5:
                                    validation_batch = get_next_batch(1,1024)
                                else:
                                    validation_batch = get_next_batch(1,64)
                                if pred_validation_labels is None:
                                    pred_validation_labels = pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]})
                                    true_validation_labels = validation_batch[1]
                                else:
                                    pred_validation_labels = numpy.vstack([pred_validation_labels, pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]})])
                                    true_validation_labels = numpy.vstack([true_validation_labels, validation_batch[1]])
                            fpr, tpr, _ = roc_curve(true_validation_labels[:,0], pred_validation_labels[:,0])
                            roc_auc = auc(fpr, tpr)
                            #check stop condition:
                            perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                            #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                            #    stop_condition = 1
                            prev_auc = roc_auc
                            print("%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"%(_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time()-t0, roc_auc, perc_chg_auc))
                            t0 = time.time()
                        batch = get_next_batch(0)
                        train_step.run(feed_dict={x: batch[0], y_: batch[1], dropout_on: 1})
                        if i == 4800:
                            stop_condition = 1
                        i += 1

                    pred_test_labels = None
                    true_test_labels = None
                    while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once
                        test_batch = get_next_batch(2, 64)
                        if pred_test_labels is None:
                            pred_test_labels = pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]})
                            true_test_labels = test_batch[1]
                        else:
                            pred_test_labels = numpy.vstack([pred_test_labels, pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]})])
                            true_test_labels = numpy.vstack([true_test_labels, test_batch[1]])
                    fpr, tpr, _ = roc_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get receiver operating characteristics
                    precision, recall, _ = precision_recall_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get precision recall curve
                    roc_auc = auc(fpr, tpr)
                    prc_auc = auc(recall, precision)
                    print("%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds"%(_datasets[dataset_num], dataset_num, roc_auc, prc_auc, time.time()-t0))
                    f=open(file_name,'a')
                    f.write(str(dataset_num) + ',' + str(motif_occ) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + ',' + str(prc_auc) + ',' + str(time.time()-t0) + '\n')
                    f.close()
                    t0 = time.time()
示例#25
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write(
        'dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n'
    )
    f.close()
    for dataset_num in range(0, len(_datasets)):
        for motif_occ in range(0, 2):
            success = False
            try:
                load_ENCODE_k562_dataset(dataset_num, motif_occ)
                success = True
            except:
                print('Hmm.. Something happened. Skipping dataset ' +
                      _datasets[dataset_num])
            if success:
                with tf.variable_scope('scopename_' + str(dataset_num) + '_' +
                                       str(motif_occ)):
                    # LSTM Parameters ============================
                    lstm_n_hidden = 32  # hidden layer num features
                    # ============================================

                    x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
                    y_ = tf.placeholder(tf.float32, shape=[None, 2])

                    # Create the model
                    x_image = tf.reshape(x, [-1, 101, 4, 1])

                    # CONVOLUTIONAL LAYER(S)
                    n_conv1 = 384
                    L_conv1 = 9
                    maxpool_len1 = 2
                    conv1 = convolution2d(x_image,
                                          n_conv1, [L_conv1, 4],
                                          padding='VALID',
                                          normalizer_fn=None)
                    conv1_pool = max_pool2d(conv1, [maxpool_len1, 1],
                                            [maxpool_len1, 1])
                    #conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob)
                    conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

                    n_conv2 = n_conv1
                    L_conv2 = 5
                    maxpool_len2 = int(
                        conv1_pool_len - L_conv2 + 1
                    )  # global maxpooling (max-pool across temporal domain)
                    conv2 = convolution2d(conv1_pool,
                                          n_conv2, [L_conv2, 1],
                                          padding='VALID',
                                          normalizer_fn=None)
                    conv2_pool = max_pool2d(conv2, [maxpool_len2, 1],
                                            [maxpool_len2, 1])
                    #conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

                    # LINEAR FC LAYER
                    y_conv = fully_connected(flatten(conv2_pool),
                                             2,
                                             activation_fn=None)
                    y_conv_softmax = tf.nn.softmax(y_conv)

                    cross_entropy = tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
                    train_step = tf.train.AdamOptimizer().minimize(
                        cross_entropy)
                    correct_prediction = tf.equal(tf.argmax(y_conv, 1),
                                                  tf.argmax(y_, 1))
                    accuracy = tf.reduce_mean(
                        tf.cast(correct_prediction, tf.float32))
                    sess.run(tf.initialize_all_variables())

                    i = 0
                    prev_auc = 0.0001  # small value to prevent DIV0
                    stop_condition = None
                    t0 = time.time()
                    while stop_condition is None:
                        #if i%100 == 0:
                        if 1 == 0:  # turned off
                            #t0 = time.time()
                            pred_validation_labels = None
                            true_validation_labels = None
                            prev_validation_epochs_completed = _validation_epochs_completed
                            while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                                if _validation_size > 1024 * 5:
                                    validation_batch = get_next_batch(1, 1024)
                                else:
                                    validation_batch = get_next_batch(1, 64)
                                if pred_validation_labels is None:
                                    pred_validation_labels = y_conv_softmax.eval(
                                        feed_dict={
                                            x: validation_batch[0],
                                            y_: validation_batch[1]
                                        })
                                    true_validation_labels = validation_batch[
                                        1]
                                else:
                                    pred_validation_labels = numpy.vstack([
                                        pred_validation_labels,
                                        y_conv_softmax.eval(
                                            feed_dict={
                                                x: validation_batch[0],
                                                y_: validation_batch[1]
                                            })
                                    ])
                                    true_validation_labels = numpy.vstack([
                                        true_validation_labels,
                                        validation_batch[1]
                                    ])
                            fpr, tpr, _ = roc_curve(
                                true_validation_labels[:, 0],
                                pred_validation_labels[:, 0])
                            roc_auc = auc(fpr, tpr)
                            #check stop condition:
                            perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                            #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                            #    stop_condition = 1
                            prev_auc = roc_auc
                            print(
                                "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                                % (_datasets[dataset_num],
                                   dataset_num, _train_epochs_completed, i,
                                   time.time() - t0, roc_auc, perc_chg_auc))
                            t0 = time.time()
                        batch = get_next_batch(0)
                        train_step.run(feed_dict={
                            x: batch[0],
                            y_: batch[1],
                            dropout_on: 1
                        })
                        if i == 7000:
                            stop_condition = 1
                        i += 1

                    pred_test_labels = None
                    true_test_labels = None
                    while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
                        test_batch = get_next_batch(2, 64)
                        if pred_test_labels is None:
                            pred_test_labels = y_conv_softmax.eval(
                                feed_dict={
                                    x: test_batch[0],
                                    y_: test_batch[1]
                                })
                            true_test_labels = test_batch[1]
                        else:
                            pred_test_labels = numpy.vstack([
                                pred_test_labels,
                                y_conv_softmax.eval(feed_dict={
                                    x: test_batch[0],
                                    y_: test_batch[1]
                                })
                            ])
                            true_test_labels = numpy.vstack(
                                [true_test_labels, test_batch[1]])
                    fpr, tpr, _ = roc_curve(
                        true_test_labels[:, 0], pred_test_labels[:, 0]
                    )  # get receiver operating characteristics
                    precision, recall, _ = precision_recall_curve(
                        true_test_labels[:, 0],
                        pred_test_labels[:, 0])  # get precision recall curve
                    roc_auc = auc(fpr, tpr)
                    prc_auc = auc(recall, precision)
                    print(
                        "%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds"
                        % (_datasets[dataset_num], dataset_num, roc_auc,
                           prc_auc, time.time() - t0))
                    f = open(file_name, 'a')
                    f.write(
                        str(dataset_num) + ',' + str(motif_occ) + ',' +
                        _datasets[dataset_num] + ',' + str(roc_auc) + ',' +
                        str(prc_auc) + ',' + str(time.time() - t0) + '\n')
                    f.close()
                    t0 = time.time()
示例#26
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        rnn_keep_prob = 0.5
    else:
        rnn_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        # LSTM Network Parameters ============================
        n_hidden = 32  # hidden layer num of features
        # ====================================================
        n_input = 4  # data input (4 possible dna bases)
        n_steps = 101  # timesteps (101 dna bases)
        n_classes = 2  # total classes (binary classification for binding/nonbinding)

        x = tf.placeholder(tf.float32, shape=[None, n_steps * n_input])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])

        # Create the model
        x_image = tf.reshape(x, [-1, n_steps, n_input])
        birnn_out = BiRNN(x_image, n_input, n_steps, n_hidden)

        # Linear activation
        # rnn_out = birnn_out[-1] # ...using LAST of the rnn inner loops (as in the MNIST example)
        rnn_out = tf.div(tf.add_n(birnn_out),
                         n_steps)  # ...using AVERAGE of the rnn inner loops
        rnn_out_drop = tf.nn.dropout(
            rnn_out, rnn_keep_prob)  # apply dropout to regularize the LSTM
        pred = fully_connected(flatten(rnn_out_drop), 2, activation_fn=None)
        pred_softmax = tf.nn.softmax(pred)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(pred, y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        stop_condition = None
        t0 = time.time()
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = pred_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            pred_softmax.eval(feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                #check stop condition:
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                #    stop_condition = 1
                prev_auc = roc_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc))
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                dropout_on: 1
            })
            if i == 7000:
                stop_condition = 1
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = pred_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1]
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    pred_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1]
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()