def run_inception(images, graph_def=None, default_graph_def_fn=_default_graph_def_fn, image_size=INCEPTION_DEFAULT_IMAGE_SIZE, input_tensor=INCEPTION_INPUT, output_tensor=INCEPTION_OUTPUT): """Run images through a pretrained Inception classifier. Args: images: Input tensors. Must be [batch, height, width, channels]. Input shape and values must be in [-1, 1], which can be achieved using `preprocess_image`. graph_def: A GraphDef proto of a pretrained Inception graph. If `None`, call `default_graph_def_fn` to get GraphDef. default_graph_def_fn: A function that returns a GraphDef. Used if `graph_def` is `None. By default, returns a pretrained InceptionV3 graph. image_size: Required image width and height. See unit tests for the default values. input_tensor: Name of input Tensor. output_tensor: Name or list of output Tensors. This function will compute activations at the specified layer. Examples include INCEPTION_V3_OUTPUT and INCEPTION_V3_FINAL_POOL which would result in this function computing the final logits or the penultimate pooling layer. Returns: Tensor or Tensors corresponding to computed `output_tensor`. Raises: ValueError: If images are not the correct size. ValueError: If neither `graph_def` nor `default_graph_def_fn` are provided. """ images = _validate_images(images, image_size) if graph_def is None: if default_graph_def_fn is None: raise ValueError('If `graph_def` is `None`, must provide ' '`default_graph_def_fn`.') graph_def = default_graph_def_fn() activations = run_image_classifier(images, graph_def, input_tensor, output_tensor) if isinstance(activations, list): for i, activation in enumerate(activations): if array_ops.rank(activation) != 2: activations[i] = layers.flatten(activation) else: if array_ops.rank(activations) != 2: activations = layers.flatten(activations) return activations
def condition_tensor(tensor, conditioning): """Condition the value of a tensor. Conditioning scheme based on https://arxiv.org/abs/1609.03499. Args: tensor: A minibatch tensor to be conditioned. conditioning: A minibatch Tensor of to condition on. Must be 2D, with first dimension the same as `tensor`. Returns: `tensor` conditioned on `conditioning`. Raises: ValueError: If the non-batch dimensions of `tensor` aren't fully defined. ValueError: If `conditioning` isn't at least 2D. ValueError: If the batch dimension for the input Tensors don't match. """ tensor.shape[1:].assert_is_fully_defined() num_features = tensor.shape[1:].num_elements() if conditioning.shape.ndims < 2: raise ValueError( 'conditioning must be at least 2D, but saw shape: %s' % conditioning.shape) mapped_conditioning = layers.linear(layers.flatten(conditioning), num_features) if not mapped_conditioning.shape.is_compatible_with(tensor.shape): mapped_conditioning = array_ops.reshape(mapped_conditioning, _get_shape(tensor)) return tensor + mapped_conditioning
def condition_tensor(tensor, conditioning): """Condition the value of a tensor. Conditioning scheme based on https://arxiv.org/abs/1609.03499. Args: tensor: A minibatch tensor to be conditioned. conditioning: A minibatch Tensor of to condition on. Must be 2D, with first dimension the same as `tensor`. Returns: `tensor` conditioned on `conditioning`. Raises: ValueError: If the non-batch dimensions of `tensor` aren't fully defined. ValueError: If `conditioning` isn't at least 2D. ValueError: If the batch dimension for the input Tensors don't match. """ tensor.shape[1:].assert_is_fully_defined() num_features = tensor.shape[1:].num_elements() if conditioning.shape.ndims < 2: raise ValueError('conditioning must be at least 2D, but saw shape: %s' % conditioning.shape) mapped_conditioning = layers.linear( layers.flatten(conditioning), num_features) if not mapped_conditioning.shape.is_compatible_with(tensor.shape): mapped_conditioning = array_ops.reshape( mapped_conditioning, _get_shape(tensor)) return tensor + mapped_conditioning
def _embedding_alexnet(is_training, images, params): with tf.variable_scope('Siamese', 'CFCASiamese', [images], reuse=tf.AUTO_REUSE): with arg_scope( [layers.conv2d], activation_fn=tf.nn.relu): net = layers.conv2d( images, 96, [11, 11], 4, padding='VALID', scope='conv1') # net = layers.batch_norm(net, decay=0.9, epsilon=1e-06, is_training=is_training) net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') net = layers.conv2d(net, 256, [5, 5], scope='conv2') # net = layers.batch_norm(net, decay=0.9, epsilon=1e-06, is_training=is_training) net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') net = layers_lib.dropout( net, keep_prob=0.7, is_training=is_training) net = layers.conv2d(net, 384, [3, 3], scope='conv3') net = layers.conv2d(net, 256, [3, 3], scope='conv4') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') net = layers_lib.dropout( net, keep_prob=0.7, is_training=is_training) net = layers_lib.flatten(net, scope='flatten1') net = layers_lib.fully_connected(net, 1024, scope='fc1', weights_regularizer=layers.l2_regularizer(0.0005)) net = layers_lib.dropout( net, keep_prob=0.5, is_training=is_training) net = layers_lib.fully_connected(net, params.embedding_size, scope='fc2', weights_regularizer=layers.l2_regularizer(0.0005)) return net
def slim_net_original(image, keep_prob): with arg_scope([layers.conv2d, layers.fully_connected], biases_initializer=tf.random_normal_initializer(stddev=0.1)): # conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', # activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, # weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, # biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, scope=None): net = layers.conv2d(image, 32, [5, 5], scope='conv1', weights_regularizer=regularizers.l1_regularizer(0.5)) # max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None) net = layers.max_pool2d(net, 2, scope='pool1') net = layers.conv2d(net, 64, [5, 5], scope='conv2', weights_regularizer=regularizers.l2_regularizer(0.5)) summaries.summarize_tensor(net, tag='conv2') net = layers.max_pool2d(net, 2, scope='pool2') net = layers.flatten(net, scope='flatten1') # fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, # normalizer_params=None, weights_initializer=initializers.xavier_initializer(), # weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, # biases_regularizer=None, scope=None): net = layers.fully_connected(net, 1024, scope='fc1') # dropout(inputs, keep_prob=0.5, is_training=True, scope=None) net = layers.dropout(net, keep_prob=keep_prob, scope='dropout1') net = layers.fully_connected(net, 10, scope='fc2') return net
def main(_): dropout_on = tf.placeholder(tf.float32) if dropout_on is not None: conv_keep_prob = 1.0 else: conv_keep_prob = 1.0 x = tf.placeholder(tf.float32, shape=[None, 14 * 4]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) x_image = tf.reshape([-1, 14, 4, 1]) n_conv1 = 384 # TBD L_conv1 = 9 # TBD maxpool_len1 = 2 conv1 = convolution2d(x_image, n_conv1, [L_conv1, 4], padding="VALID", normalizer_fn=None) conv1_pool_len = int((14 - L_conv1 + 1) / maxpool_len1) n_conv2 = n_conv1 L_conv2 = 5 maxpool_len2 = int( conv1_pool_len - L_conv2 + 1) # global maxpooling (max-pool across temporal domain) conv2 = convolution2d(conv1_pool, n_conv2, [L_conv2, 1], padding='VALID', normalizer_fn=None) conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1]) # conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob) # LINEAR FC LAYER y_conv = fully_connected(flatten(conv2_pool), 2, activation_fn=None) y_conv_softmax = tf.nn.softmax(y_conv) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess.run(tf.initialize_all_variables())
def __init__(self, sequence_length, num_classes): #placeholders for input, output and dropout self.input_x = tf.placeholder(tf.float32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") x_image = tf.reshape(self.input_x, shape=[-1, 14, 4, 1]) n_conv1 = 44 L_conv1 = 5 maxpool_len1 = 2 conv1 = convolution2d(x_image, n_conv1, [L_conv1, 4], padding='VALID', normalizer_fn=None) conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1]) conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1) # n_conv2 = n_conv1 # L_conv2 = 3 # maxpool_len2 = int(conv1_pool_len - L_conv2 + 1) # global maxpooling (max-pool across temporal domain) # conv2 = convolution2d(conv1_pool, n_conv2, [L_conv2, 1], padding='VALID', normalizer_fn=None) # conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1]) # LINEAR FC LAYER y_conv = fully_connected(flatten(conv1_pool), 2, activation_fn=None) prediction = tf.nn.softmax(y_conv) self.cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=self.input_y)) # train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def slim_net_original(image, keep_prob): with arg_scope( [layers.conv2d, layers.fully_connected], biases_initializer=tf.random_normal_initializer(stddev=0.1)): # conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', # activation_fn=nn.relu, normalizer_fn=None, normalizer_params=None, # weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, # biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, scope=None): net = layers.conv2d( image, 32, [5, 5], scope='conv1', weights_regularizer=regularizers.l1_regularizer(0.5)) # max_pool(inputs, kernel_size, stride=2, padding='VALID', scope=None) net = layers.max_pool2d(net, 2, scope='pool1') net = layers.conv2d( net, 64, [5, 5], scope='conv2', weights_regularizer=regularizers.l2_regularizer(0.5)) summaries.summarize_tensor(net, tag='conv2') net = layers.max_pool2d(net, 2, scope='pool2') net = layers.flatten(net, scope='flatten1') # fully_connected(inputs, num_outputs, activation_fn=nn.relu, normalizer_fn=None, # normalizer_params=None, weights_initializer=initializers.xavier_initializer(), # weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, # biases_regularizer=None, scope=None): net = layers.fully_connected(net, 1024, scope='fc1') # dropout(inputs, keep_prob=0.5, is_training=True, scope=None) net = layers.dropout(net, keep_prob=keep_prob, scope='dropout1') net = layers.fully_connected(net, 10, scope='fc2') return net
def build_layer_fn(x, w_initializer, b_initializer): var_collection = { 'weights': ['CONTRIB_LAYERS_FC_WEIGHTS'], 'biases': ['CONTRIB_LAYERS_FC_BIASES'] } x = contrib_layers.flatten(x) net = contrib_layers.fully_connected( x, 3, weights_initializer=w_initializer, biases_initializer=b_initializer, variables_collections=var_collection) weight_vars = ops.get_collection('CONTRIB_LAYERS_FC_WEIGHTS') self.assertEquals(1, len(weight_vars)) bias_vars = ops.get_collection('CONTRIB_LAYERS_FC_BIASES') self.assertEquals(1, len(bias_vars)) expected_normalized_vars = { 'contrib.layers.fully_connected.weights': weight_vars[0] } expected_not_normalized_vars = { 'contrib.layers.fully_connected.bias': bias_vars[0] } return net, expected_normalized_vars, expected_not_normalized_vars
def main(_): global _train_epochs_completed global _validation_epochs_completed global _test_epochs_completed global _datasets global _validation_size global _test_labels dropout_on = tf.placeholder(tf.float32) if dropout_on is not None: conv_keep_prob = 1.0 else: conv_keep_prob = 1.0 file_name = 'out_' + str(int(time.time())) + '.csv' f = open(file_name, 'w') # clear file f.write('dataset_num,dataset_name,roc_auc\n') f.close() for dataset_num in range(0, len(_datasets)): load_ENCODE_k562_dataset(dataset_num) x = tf.placeholder(tf.float32, shape=[None, 101 * 4]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) # Create the model x_image = tf.reshape(x, [-1, 101, 4, 1]) # CONVOLUTIONAL LAYER(S) n_conv3 = 64 L_conv3 = 9 maxpool_len3 = int(101 - L_conv3 + 1) # global maxpooling ("across temporal domain") conv3 = convolution2d(x_image, n_conv3, [L_conv3, 4], padding='VALID', normalizer_fn=None) conv3_pool = max_pool2d(conv3, [maxpool_len3, 1], [maxpool_len3, 1]) # LINEAR FC LAYER y_conv = fully_connected(flatten(conv3_pool), 2, activation_fn=None) y_conv_softmax = tf.nn.softmax(y_conv) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess.run(tf.initialize_all_variables()) i = 0 prev_auc = 0.0001 # small value to prevent DIV0 stop_condition = None t0 = time.time() while stop_condition is None: if i % 1000 == 0: #t0 = time.time() pred_validation_labels = None true_validation_labels = None prev_validation_epochs_completed = _validation_epochs_completed while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once if _validation_size > 1024 * 5: validation_batch = get_next_batch(1, 1024) else: validation_batch = get_next_batch(1, 64) if pred_validation_labels is None: pred_validation_labels = y_conv_softmax.eval( feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) true_validation_labels = validation_batch[1] else: pred_validation_labels = numpy.vstack([ pred_validation_labels, y_conv_softmax.eval(feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) ]) true_validation_labels = numpy.vstack( [true_validation_labels, validation_batch[1]]) fpr, tpr, _ = roc_curve(true_validation_labels[:, 0], pred_validation_labels[:, 0]) roc_auc = auc(fpr, tpr) #check stop condition: perc_chg_auc = (roc_auc - prev_auc) / prev_auc #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5% # stop_condition = 1 prev_auc = roc_auc print( "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g" % (_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time() - t0, roc_auc, perc_chg_auc)) t0 = time.time() batch = get_next_batch(0) train_step.run(feed_dict={ x: batch[0], y_: batch[1], dropout_on: 1 }) if i == 7000: stop_condition = 1 i += 1 pred_test_labels = None true_test_labels = None while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once test_batch = get_next_batch(2, 64) if pred_test_labels is None: pred_test_labels = y_conv_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1] }) true_test_labels = test_batch[1] else: pred_test_labels = numpy.vstack([ pred_test_labels, y_conv_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1] }) ]) true_test_labels = numpy.vstack( [true_test_labels, test_batch[1]]) fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0]) roc_auc = auc(fpr, tpr) print("%s, dataset %g, final test roc auc %g" % (_datasets[dataset_num], dataset_num, roc_auc)) f = open(file_name, 'a') f.write( str(dataset_num) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + '\n') f.close()
def main(_): global _train_epochs_completed global _validation_epochs_completed global _test_epochs_completed global _datasets global _validation_size global _test_labels dropout_on = tf.placeholder(tf.float32) if dropout_on is not None: rnn_keep_prob = 0.5 else: rnn_keep_prob = 1.0 file_name = 'out_' + str(int(time.time())) + '.csv' f = open(file_name, 'w') # clear file f.write('dataset_num,dataset_name,roc_auc\n') f.close() for dataset_num in range(0, len(_datasets)): load_ENCODE_k562_dataset(dataset_num) # LSTM Network Parameters ============================ n_hidden = 32 # hidden layer num of features # ==================================================== n_input = 4 # data input (4 possible dna bases) n_steps = 101 # timesteps (101 dna bases) n_classes = 2 # total classes (binary classification for binding/nonbinding) x = tf.placeholder(tf.float32, shape=[None, n_steps * n_input]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) # Create the model x_image = tf.reshape(x, [-1, n_steps, n_input]) birnn_out = BiRNN(x_image, n_input, n_steps, n_hidden) # Linear activation # rnn_out = birnn_out[-1] # ...using LAST of the rnn inner loops (as in the MNIST example) rnn_out = tf.div(tf.add_n(birnn_out), n_steps) # ...using AVERAGE of the rnn inner loops rnn_out_drop = tf.nn.dropout( rnn_out, rnn_keep_prob) # apply dropout to regularize the LSTM pred = fully_connected(flatten(rnn_out_drop), 2, activation_fn=None) pred_softmax = tf.nn.softmax(pred) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(pred, y_)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) sess.run(tf.initialize_all_variables()) i = 0 prev_auc = 0.0001 # small value to prevent DIV0 stop_condition = None t0 = time.time() while stop_condition is None: if i % 1000 == 0: #t0 = time.time() pred_validation_labels = None true_validation_labels = None prev_validation_epochs_completed = _validation_epochs_completed while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once if _validation_size > 1024 * 5: validation_batch = get_next_batch(1, 1024) else: validation_batch = get_next_batch(1, 64) if pred_validation_labels is None: pred_validation_labels = pred_softmax.eval( feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) true_validation_labels = validation_batch[1] else: pred_validation_labels = numpy.vstack([ pred_validation_labels, pred_softmax.eval(feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) ]) true_validation_labels = numpy.vstack( [true_validation_labels, validation_batch[1]]) fpr, tpr, _ = roc_curve(true_validation_labels[:, 0], pred_validation_labels[:, 0]) roc_auc = auc(fpr, tpr) #check stop condition: perc_chg_auc = (roc_auc - prev_auc) / prev_auc #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5% # stop_condition = 1 prev_auc = roc_auc print( "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g" % (_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time() - t0, roc_auc, perc_chg_auc)) t0 = time.time() batch = get_next_batch(0) train_step.run(feed_dict={ x: batch[0], y_: batch[1], dropout_on: 1 }) if i == 7000: stop_condition = 1 i += 1 pred_test_labels = None true_test_labels = None while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once test_batch = get_next_batch(2, 64) if pred_test_labels is None: pred_test_labels = pred_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1] }) true_test_labels = test_batch[1] else: pred_test_labels = numpy.vstack([ pred_test_labels, pred_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1] }) ]) true_test_labels = numpy.vstack( [true_test_labels, test_batch[1]]) fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0]) roc_auc = auc(fpr, tpr) print("%s, dataset %g, final test roc auc %g" % (_datasets[dataset_num], dataset_num, roc_auc)) f = open(file_name, 'a') f.write( str(dataset_num) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + '\n') f.close()
def main(_): global _train_epochs_completed global _validation_epochs_completed global _test_epochs_completed global _datasets global _validation_size global _test_labels dropout_on = tf.placeholder(tf.float32) if dropout_on is not None: conv_keep_prob = 1.0 else: conv_keep_prob = 1.0 file_name = 'out_' + str(int(time.time())) + '.csv' f = open(file_name, 'w') # clear file f.write( 'dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n' ) f.close() for dataset_num in range(0, len(_datasets)): for motif_occ in range(0, 2): success = False try: load_ENCODE_k562_dataset(dataset_num, motif_occ) success = True except: print('Hmm.. Something happened. Skipping dataset ' + _datasets[dataset_num]) if success: with tf.variable_scope('scopename_' + str(dataset_num) + '_' + str(motif_occ)): # LSTM Parameters ============================ lstm_n_hidden = 32 # hidden layer num features # ============================================ x = tf.placeholder(tf.float32, shape=[None, 101 * 4]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) # Create the model x_image = tf.reshape(x, [-1, 101, 4, 1]) # CONVOLUTIONAL LAYER(S) n_conv1 = 384 L_conv1 = 9 maxpool_len1 = 2 conv1 = convolution2d(x_image, n_conv1, [L_conv1, 4], padding='VALID', normalizer_fn=None) conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1]) #conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob) conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1) n_conv2 = n_conv1 L_conv2 = 5 maxpool_len2 = int( conv1_pool_len - L_conv2 + 1 ) # global maxpooling (max-pool across temporal domain) conv2 = convolution2d(conv1_pool, n_conv2, [L_conv2, 1], padding='VALID', normalizer_fn=None) conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1]) #conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob) # LINEAR FC LAYER y_conv = fully_connected(flatten(conv2_pool), 2, activation_fn=None) y_conv_softmax = tf.nn.softmax(y_conv) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)) train_step = tf.train.AdamOptimizer().minimize( cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) sess.run(tf.initialize_all_variables()) i = 0 prev_auc = 0.0001 # small value to prevent DIV0 stop_condition = None t0 = time.time() while stop_condition is None: #if i%100 == 0: if 1 == 0: # turned off #t0 = time.time() pred_validation_labels = None true_validation_labels = None prev_validation_epochs_completed = _validation_epochs_completed while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once if _validation_size > 1024 * 5: validation_batch = get_next_batch(1, 1024) else: validation_batch = get_next_batch(1, 64) if pred_validation_labels is None: pred_validation_labels = y_conv_softmax.eval( feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) true_validation_labels = validation_batch[ 1] else: pred_validation_labels = numpy.vstack([ pred_validation_labels, y_conv_softmax.eval( feed_dict={ x: validation_batch[0], y_: validation_batch[1] }) ]) true_validation_labels = numpy.vstack([ true_validation_labels, validation_batch[1] ]) fpr, tpr, _ = roc_curve( true_validation_labels[:, 0], pred_validation_labels[:, 0]) roc_auc = auc(fpr, tpr) #check stop condition: perc_chg_auc = (roc_auc - prev_auc) / prev_auc #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5% # stop_condition = 1 prev_auc = roc_auc print( "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g" % (_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time() - t0, roc_auc, perc_chg_auc)) t0 = time.time() batch = get_next_batch(0) train_step.run(feed_dict={ x: batch[0], y_: batch[1], dropout_on: 1 }) if i == 7000: stop_condition = 1 i += 1 pred_test_labels = None true_test_labels = None while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once test_batch = get_next_batch(2, 64) if pred_test_labels is None: pred_test_labels = y_conv_softmax.eval( feed_dict={ x: test_batch[0], y_: test_batch[1] }) true_test_labels = test_batch[1] else: pred_test_labels = numpy.vstack([ pred_test_labels, y_conv_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1] }) ]) true_test_labels = numpy.vstack( [true_test_labels, test_batch[1]]) fpr, tpr, _ = roc_curve( true_test_labels[:, 0], pred_test_labels[:, 0] ) # get receiver operating characteristics precision, recall, _ = precision_recall_curve( true_test_labels[:, 0], pred_test_labels[:, 0]) # get precision recall curve roc_auc = auc(fpr, tpr) prc_auc = auc(recall, precision) print( "%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds" % (_datasets[dataset_num], dataset_num, roc_auc, prc_auc, time.time() - t0)) f = open(file_name, 'a') f.write( str(dataset_num) + ',' + str(motif_occ) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + ',' + str(prc_auc) + ',' + str(time.time() - t0) + '\n') f.close() t0 = time.time()
def main(_): global _train_epochs_completed global _validation_epochs_completed global _test_epochs_completed global _datasets global _validation_size global _test_labels dropout_on = tf.placeholder(tf.float32) if dropout_on is not None: rnn_keep_prob = 1.0 else: rnn_keep_prob = 1.0 file_name = 'out_' + str(int(time.time())) + '.csv' f=open(file_name,'w') # clear file f.write('dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n') f.close() for dataset_num in range(106, len(_datasets)): for motif_occ in range(0,2): success = False try: load_ENCODE_k562_dataset(dataset_num,motif_occ) success = True except: print('Hmm.. Something happened. Skipping dataset ' + _datasets[dataset_num]) if success: with tf.variable_scope('scopename_' + str(dataset_num) + '_' + str(motif_occ)): # LSTM Parameters ============================ lstm_n_hidden = 32 # hidden layer num features # ============================================ x = tf.placeholder(tf.float32, shape=[None, 101*4]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) # Create the model x_image = tf.reshape(x, [-1,101,4,1]) # CONVOLUTIONAL LAYER(S) n_conv1 = 128 L_conv1 = 9 n_steps1 = (101-L_conv1+1) conv1 = convolution2d(x_image, n_conv1, [L_conv1,4], padding='VALID', normalizer_fn=None) conv1_resh = tf.reshape(conv1, [-1,n_steps1,n_conv1]) # LSTM LAYER(S) conv1_unpacked = tf.unpack(conv1_resh, axis=1) # this func does it all for us :) lstm_fw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden) lstm_bw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden) birnn_out,_,_ = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, conv1_unpacked, dtype=tf.float32) # Linear activation # rnn_out = birnn_out[-1] # to use LAST of the rnn inner loops (as in the MNIST example) rnn_out = tf.div(tf.add_n(birnn_out), 101) # to use the AVERAGE of the rnn inner loops rnn_out_drop = tf.nn.dropout(rnn_out, rnn_keep_prob) # apply dropout to regularize the LSTM pred = fully_connected(flatten(rnn_out_drop), 2, activation_fn=None) pred_softmax = tf.nn.softmax(pred) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y_)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) sess.run(tf.initialize_all_variables()) i = 0 prev_auc = 0.0001 # small value to prevent DIV0 stop_condition = None t0 = time.time() while stop_condition is None: #if i%100 == 0: if 1 == 0: # turned off #t0 = time.time() pred_validation_labels = None true_validation_labels = None prev_validation_epochs_completed = _validation_epochs_completed while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once if _validation_size > 1024*5: validation_batch = get_next_batch(1,1024) else: validation_batch = get_next_batch(1,64) if pred_validation_labels is None: pred_validation_labels = pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]}) true_validation_labels = validation_batch[1] else: pred_validation_labels = numpy.vstack([pred_validation_labels, pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]})]) true_validation_labels = numpy.vstack([true_validation_labels, validation_batch[1]]) fpr, tpr, _ = roc_curve(true_validation_labels[:,0], pred_validation_labels[:,0]) roc_auc = auc(fpr, tpr) #check stop condition: perc_chg_auc = (roc_auc - prev_auc) / prev_auc #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5% # stop_condition = 1 prev_auc = roc_auc print("%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"%(_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time()-t0, roc_auc, perc_chg_auc)) t0 = time.time() batch = get_next_batch(0) train_step.run(feed_dict={x: batch[0], y_: batch[1], dropout_on: 1}) if i == 4800: stop_condition = 1 i += 1 pred_test_labels = None true_test_labels = None while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once test_batch = get_next_batch(2, 64) if pred_test_labels is None: pred_test_labels = pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]}) true_test_labels = test_batch[1] else: pred_test_labels = numpy.vstack([pred_test_labels, pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]})]) true_test_labels = numpy.vstack([true_test_labels, test_batch[1]]) fpr, tpr, _ = roc_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get receiver operating characteristics precision, recall, _ = precision_recall_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get precision recall curve roc_auc = auc(fpr, tpr) prc_auc = auc(recall, precision) print("%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds"%(_datasets[dataset_num], dataset_num, roc_auc, prc_auc, time.time()-t0)) f=open(file_name,'a') f.write(str(dataset_num) + ',' + str(motif_occ) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + ',' + str(prc_auc) + ',' + str(time.time()-t0) + '\n') f.close() t0 = time.time()
def main(_): global _train_epochs_completed global _validation_epochs_completed global _test_epochs_completed global _datasets global _validation_size global _test_labels file_name = 'out_' + str(int(time.time())) + '.csv' f = open(file_name, 'w') # clear file f.write('dataset_num,dataset_name,roc_auc\n') f.close() _datasets = utils.remove_non_existing_datafiles(_datasets) for dataset_num in range(0, len(_datasets)): load_ENCODE_k562_dataset(dataset_num) x = tf.placeholder(tf.float32, shape=[None, 101 * 4]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) conv_keep_prob = tf.placeholder(tf.float32) # Create the model x_image = tf.reshape(x, [-1, 101, 4, 1]) # CONVOLUTIONAL LAYER(S) n_conv1 = 64 L_conv1 = 9 maxpool_len1 = 2 conv1 = convolution2d(x_image, n_conv1, [L_conv1, 4], padding='VALID', normalizer_fn=None) conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1]) conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob) conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1) n_conv2 = n_conv1 L_conv2 = 5 maxpool_len2 = int( conv1_pool_len - L_conv2 + 1) # global maxpooling (max-pool across temporal domain) conv2 = convolution2d(conv1_drop, n_conv2, [L_conv2, 1], padding='VALID', normalizer_fn=None) conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1]) conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob) # LINEAR FC LAYER y_conv = fully_connected(flatten(conv2_drop), 2, activation_fn=None) y_conv_softmax = tf.nn.softmax(y_conv) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_)) train_step = tf.train.AdamOptimizer().minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) sess.run(tf.initialize_all_variables()) i = 0 prev_auc = 0.0001 # small value to prevent DIV0 prev_train_epochs_compl = 0 stop_condition = None t0 = time.time() this_conv_keep_prob = 0.5 final_keep_prob = 0.75 while stop_condition is None: if i % 1000 == 0: #t0 = time.time() pred_validation_labels = None true_validation_labels = None prev_validation_epochs_completed = _validation_epochs_completed while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once if _validation_size > 1024 * 5: validation_batch = get_next_batch(1, 1024) else: validation_batch = get_next_batch(1, 64) if pred_validation_labels is None: pred_validation_labels = y_conv_softmax.eval( feed_dict={ x: validation_batch[0], y_: validation_batch[1], conv_keep_prob: 1.0 }) true_validation_labels = validation_batch[1] else: pred_validation_labels = numpy.vstack([ pred_validation_labels, y_conv_softmax.eval( feed_dict={ x: validation_batch[0], y_: validation_batch[1], conv_keep_prob: 1.0 }) ]) true_validation_labels = numpy.vstack( [true_validation_labels, validation_batch[1]]) fpr, tpr, _ = roc_curve(true_validation_labels[:, 0], pred_validation_labels[:, 0]) roc_auc = auc(fpr, tpr) perc_chg_auc = (roc_auc - prev_auc) / prev_auc print( "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g, conv_keep_prob %g" % (_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time() - t0, roc_auc, perc_chg_auc, this_conv_keep_prob)) #check stop condition: if i == 3000 or i == 10000 or i == 20000: # increase keep_prob at these iteration numbers (not epochs) if this_conv_keep_prob < final_keep_prob: this_conv_keep_prob += 0.2 if this_conv_keep_prob > final_keep_prob: this_conv_keep_prob = final_keep_prob else: stop_condition = 1 prev_train_epochs_compl = _train_epochs_completed prev_auc = roc_auc t0 = time.time() batch = get_next_batch(0) train_step.run(feed_dict={ x: batch[0], y_: batch[1], conv_keep_prob: this_conv_keep_prob }) i += 1 pred_test_labels = None true_test_labels = None while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once test_batch = get_next_batch(2, 64) if pred_test_labels is None: pred_test_labels = y_conv_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1], conv_keep_prob: 1.0 }) true_test_labels = test_batch[1] else: pred_test_labels = numpy.vstack([ pred_test_labels, y_conv_softmax.eval(feed_dict={ x: test_batch[0], y_: test_batch[1], conv_keep_prob: 1.0 }) ]) true_test_labels = numpy.vstack( [true_test_labels, test_batch[1]]) fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0]) roc_auc = auc(fpr, tpr) print("%s, dataset %g, final test roc auc %g" % (_datasets[dataset_num], dataset_num, roc_auc)) f = open(file_name, 'a') f.write( str(dataset_num) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + '\n') f.close()