Пример #1
0
def main(_):

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    x = tf.placeholder(tf.float32, shape=[None, 14 * 4])
    y_ = tf.placeholder(tf.float32, shape=[None, 2])

    x_image = tf.reshape([-1, 14, 4, 1])

    n_conv1 = 384  # TBD
    L_conv1 = 9  # TBD
    maxpool_len1 = 2
    conv1 = convolution2d(x_image,
                          n_conv1, [L_conv1, 4],
                          padding="VALID",
                          normalizer_fn=None)
    conv1_pool_len = int((14 - L_conv1 + 1) / maxpool_len1)

    n_conv2 = n_conv1
    L_conv2 = 5
    maxpool_len2 = int(
        conv1_pool_len - L_conv2 +
        1)  # global maxpooling (max-pool across temporal domain)
    conv2 = convolution2d(conv1_pool,
                          n_conv2, [L_conv2, 1],
                          padding='VALID',
                          normalizer_fn=None)
    conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])
    # conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

    # LINEAR FC LAYER
    y_conv = fully_connected(flatten(conv2_pool), 2, activation_fn=None)
    y_conv_softmax = tf.nn.softmax(y_conv)

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    sess.run(tf.initialize_all_variables())
Пример #2
0
    def __init__(self, sequence_length, num_classes):

        #placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        x_image = tf.reshape(self.input_x, shape=[-1, 14, 4, 1])

        n_conv1 = 44
        L_conv1 = 5
        maxpool_len1 = 2
        conv1 = convolution2d(x_image,
                              n_conv1, [L_conv1, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1])
        conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

        # n_conv2 = n_conv1
        # L_conv2 = 3
        # maxpool_len2 = int(conv1_pool_len - L_conv2 + 1)  # global maxpooling (max-pool across temporal domain)
        # conv2 = convolution2d(conv1_pool, n_conv2, [L_conv2, 1], padding='VALID', normalizer_fn=None)
        # conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv1_pool), 2, activation_fn=None)
        prediction = tf.nn.softmax(y_conv)

        self.cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_conv,
                                                    labels=self.input_y))
        # train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

        correct_prediction = tf.equal(tf.argmax(prediction, 1),
                                      tf.argmax(self.input_y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Пример #3
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])

        # Create the model
        x_image = tf.reshape(x, [-1, 101, 4, 1])

        # CONVOLUTIONAL LAYER(S)
        n_conv3 = 64
        L_conv3 = 9
        maxpool_len3 = int(101 - L_conv3 +
                           1)  # global maxpooling ("across temporal domain")
        conv3 = convolution2d(x_image,
                              n_conv3, [L_conv3, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv3_pool = max_pool2d(conv3, [maxpool_len3, 1], [maxpool_len3, 1])

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv3_pool), 2, activation_fn=None)
        y_conv_softmax = tf.nn.softmax(y_conv)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        stop_condition = None
        t0 = time.time()
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = y_conv_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            y_conv_softmax.eval(feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1]
                            })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                #check stop condition:
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                #    stop_condition = 1
                prev_auc = roc_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc))
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                dropout_on: 1
            })
            if i == 7000:
                stop_condition = 1
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = y_conv_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1]
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    y_conv_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1]
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()
Пример #4
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        conv_keep_prob = 1.0
    else:
        conv_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write(
        'dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n'
    )
    f.close()
    for dataset_num in range(0, len(_datasets)):
        for motif_occ in range(0, 2):
            success = False
            try:
                load_ENCODE_k562_dataset(dataset_num, motif_occ)
                success = True
            except:
                print('Hmm.. Something happened. Skipping dataset ' +
                      _datasets[dataset_num])
            if success:
                with tf.variable_scope('scopename_' + str(dataset_num) + '_' +
                                       str(motif_occ)):
                    # LSTM Parameters ============================
                    lstm_n_hidden = 32  # hidden layer num features
                    # ============================================

                    x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
                    y_ = tf.placeholder(tf.float32, shape=[None, 2])

                    # Create the model
                    x_image = tf.reshape(x, [-1, 101, 4, 1])

                    # CONVOLUTIONAL LAYER(S)
                    n_conv1 = 384
                    L_conv1 = 9
                    maxpool_len1 = 2
                    conv1 = convolution2d(x_image,
                                          n_conv1, [L_conv1, 4],
                                          padding='VALID',
                                          normalizer_fn=None)
                    conv1_pool = max_pool2d(conv1, [maxpool_len1, 1],
                                            [maxpool_len1, 1])
                    #conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob)
                    conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

                    n_conv2 = n_conv1
                    L_conv2 = 5
                    maxpool_len2 = int(
                        conv1_pool_len - L_conv2 + 1
                    )  # global maxpooling (max-pool across temporal domain)
                    conv2 = convolution2d(conv1_pool,
                                          n_conv2, [L_conv2, 1],
                                          padding='VALID',
                                          normalizer_fn=None)
                    conv2_pool = max_pool2d(conv2, [maxpool_len2, 1],
                                            [maxpool_len2, 1])
                    #conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

                    # LINEAR FC LAYER
                    y_conv = fully_connected(flatten(conv2_pool),
                                             2,
                                             activation_fn=None)
                    y_conv_softmax = tf.nn.softmax(y_conv)

                    cross_entropy = tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
                    train_step = tf.train.AdamOptimizer().minimize(
                        cross_entropy)
                    correct_prediction = tf.equal(tf.argmax(y_conv, 1),
                                                  tf.argmax(y_, 1))
                    accuracy = tf.reduce_mean(
                        tf.cast(correct_prediction, tf.float32))
                    sess.run(tf.initialize_all_variables())

                    i = 0
                    prev_auc = 0.0001  # small value to prevent DIV0
                    stop_condition = None
                    t0 = time.time()
                    while stop_condition is None:
                        #if i%100 == 0:
                        if 1 == 0:  # turned off
                            #t0 = time.time()
                            pred_validation_labels = None
                            true_validation_labels = None
                            prev_validation_epochs_completed = _validation_epochs_completed
                            while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                                if _validation_size > 1024 * 5:
                                    validation_batch = get_next_batch(1, 1024)
                                else:
                                    validation_batch = get_next_batch(1, 64)
                                if pred_validation_labels is None:
                                    pred_validation_labels = y_conv_softmax.eval(
                                        feed_dict={
                                            x: validation_batch[0],
                                            y_: validation_batch[1]
                                        })
                                    true_validation_labels = validation_batch[
                                        1]
                                else:
                                    pred_validation_labels = numpy.vstack([
                                        pred_validation_labels,
                                        y_conv_softmax.eval(
                                            feed_dict={
                                                x: validation_batch[0],
                                                y_: validation_batch[1]
                                            })
                                    ])
                                    true_validation_labels = numpy.vstack([
                                        true_validation_labels,
                                        validation_batch[1]
                                    ])
                            fpr, tpr, _ = roc_curve(
                                true_validation_labels[:, 0],
                                pred_validation_labels[:, 0])
                            roc_auc = auc(fpr, tpr)
                            #check stop condition:
                            perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                            #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                            #    stop_condition = 1
                            prev_auc = roc_auc
                            print(
                                "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"
                                % (_datasets[dataset_num],
                                   dataset_num, _train_epochs_completed, i,
                                   time.time() - t0, roc_auc, perc_chg_auc))
                            t0 = time.time()
                        batch = get_next_batch(0)
                        train_step.run(feed_dict={
                            x: batch[0],
                            y_: batch[1],
                            dropout_on: 1
                        })
                        if i == 7000:
                            stop_condition = 1
                        i += 1

                    pred_test_labels = None
                    true_test_labels = None
                    while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
                        test_batch = get_next_batch(2, 64)
                        if pred_test_labels is None:
                            pred_test_labels = y_conv_softmax.eval(
                                feed_dict={
                                    x: test_batch[0],
                                    y_: test_batch[1]
                                })
                            true_test_labels = test_batch[1]
                        else:
                            pred_test_labels = numpy.vstack([
                                pred_test_labels,
                                y_conv_softmax.eval(feed_dict={
                                    x: test_batch[0],
                                    y_: test_batch[1]
                                })
                            ])
                            true_test_labels = numpy.vstack(
                                [true_test_labels, test_batch[1]])
                    fpr, tpr, _ = roc_curve(
                        true_test_labels[:, 0], pred_test_labels[:, 0]
                    )  # get receiver operating characteristics
                    precision, recall, _ = precision_recall_curve(
                        true_test_labels[:, 0],
                        pred_test_labels[:, 0])  # get precision recall curve
                    roc_auc = auc(fpr, tpr)
                    prc_auc = auc(recall, precision)
                    print(
                        "%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds"
                        % (_datasets[dataset_num], dataset_num, roc_auc,
                           prc_auc, time.time() - t0))
                    f = open(file_name, 'a')
                    f.write(
                        str(dataset_num) + ',' + str(motif_occ) + ',' +
                        _datasets[dataset_num] + ',' + str(roc_auc) + ',' +
                        str(prc_auc) + ',' + str(time.time() - t0) + '\n')
                    f.close()
                    t0 = time.time()
Пример #5
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    dropout_on = tf.placeholder(tf.float32)
    if dropout_on is not None:
        rnn_keep_prob = 1.0
    else:
        rnn_keep_prob = 1.0

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f=open(file_name,'w') # clear file
    f.write('dataset_num,motif_discovery=0|motif_occupancy=1,dataset_name,roc_auc,prc_auc,time(sec)\n')
    f.close()
    for dataset_num in range(106, len(_datasets)):
        for motif_occ in range(0,2):
            success = False
            try:
                load_ENCODE_k562_dataset(dataset_num,motif_occ)
                success = True
            except:
                print('Hmm.. Something happened. Skipping dataset ' + _datasets[dataset_num])
            if success:
                with tf.variable_scope('scopename_' + str(dataset_num) + '_' + str(motif_occ)):
                    # LSTM Parameters ============================
                    lstm_n_hidden = 32 # hidden layer num features
                    # ============================================

                    x = tf.placeholder(tf.float32, shape=[None, 101*4])
                    y_ = tf.placeholder(tf.float32, shape=[None, 2])

                    # Create the model
                    x_image = tf.reshape(x, [-1,101,4,1])

                    # CONVOLUTIONAL LAYER(S)
                    n_conv1 = 128
                    L_conv1 = 9
                    n_steps1 = (101-L_conv1+1)
                    conv1 = convolution2d(x_image, n_conv1, [L_conv1,4], padding='VALID', normalizer_fn=None)
                    conv1_resh = tf.reshape(conv1, [-1,n_steps1,n_conv1])

                    # LSTM LAYER(S)
                    conv1_unpacked = tf.unpack(conv1_resh, axis=1) # this func does it all for us :)
                    lstm_fw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden)
                    lstm_bw_cell = rnn_cell.BasicLSTMCell(lstm_n_hidden)
                    birnn_out,_,_ = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, conv1_unpacked, dtype=tf.float32)
                    
                    # Linear activation
                    # rnn_out = birnn_out[-1] # to use LAST of the rnn inner loops (as in the MNIST example)
                    rnn_out = tf.div(tf.add_n(birnn_out), 101) # to use the AVERAGE of the rnn inner loops
                    rnn_out_drop = tf.nn.dropout(rnn_out, rnn_keep_prob) # apply dropout to regularize the LSTM
                    pred = fully_connected(flatten(rnn_out_drop), 2, activation_fn=None)
                    pred_softmax = tf.nn.softmax(pred)

                    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y_))
                    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
                    sess.run(tf.initialize_all_variables())
                    
                    i = 0
                    prev_auc = 0.0001 # small value to prevent DIV0
                    stop_condition = None
                    t0 = time.time()
                    while stop_condition is None:
                        #if i%100 == 0:
                        if 1 == 0: # turned off
                            #t0 = time.time()
                            pred_validation_labels = None
                            true_validation_labels = None
                            prev_validation_epochs_completed = _validation_epochs_completed
                            while _validation_epochs_completed - prev_validation_epochs_completed == 0: # do in mini batches because single GTX970 has insufficient memory to test all at once
                                if _validation_size > 1024*5:
                                    validation_batch = get_next_batch(1,1024)
                                else:
                                    validation_batch = get_next_batch(1,64)
                                if pred_validation_labels is None:
                                    pred_validation_labels = pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]})
                                    true_validation_labels = validation_batch[1]
                                else:
                                    pred_validation_labels = numpy.vstack([pred_validation_labels, pred_softmax.eval(feed_dict={x: validation_batch[0], y_: validation_batch[1]})])
                                    true_validation_labels = numpy.vstack([true_validation_labels, validation_batch[1]])
                            fpr, tpr, _ = roc_curve(true_validation_labels[:,0], pred_validation_labels[:,0])
                            roc_auc = auc(fpr, tpr)
                            #check stop condition:
                            perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                            #if perc_chg_auc < 0.005: # stop when auc moving average on validation set changes by <0.5%
                            #    stop_condition = 1
                            prev_auc = roc_auc
                            print("%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g"%(_datasets[dataset_num], dataset_num, _train_epochs_completed, i, time.time()-t0, roc_auc, perc_chg_auc))
                            t0 = time.time()
                        batch = get_next_batch(0)
                        train_step.run(feed_dict={x: batch[0], y_: batch[1], dropout_on: 1})
                        if i == 4800:
                            stop_condition = 1
                        i += 1

                    pred_test_labels = None
                    true_test_labels = None
                    while _test_epochs_completed == 0: # do testing in mini batches because single GTX970 has insufficient memory to test all at once
                        test_batch = get_next_batch(2, 64)
                        if pred_test_labels is None:
                            pred_test_labels = pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]})
                            true_test_labels = test_batch[1]
                        else:
                            pred_test_labels = numpy.vstack([pred_test_labels, pred_softmax.eval(feed_dict={x: test_batch[0], y_: test_batch[1]})])
                            true_test_labels = numpy.vstack([true_test_labels, test_batch[1]])
                    fpr, tpr, _ = roc_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get receiver operating characteristics
                    precision, recall, _ = precision_recall_curve(true_test_labels[:,0], pred_test_labels[:,0]) # get precision recall curve
                    roc_auc = auc(fpr, tpr)
                    prc_auc = auc(recall, precision)
                    print("%s, dataset %g, final test roc auc %g, final test prc auc %g, time elapsed %g seconds"%(_datasets[dataset_num], dataset_num, roc_auc, prc_auc, time.time()-t0))
                    f=open(file_name,'a')
                    f.write(str(dataset_num) + ',' + str(motif_occ) + ',' + _datasets[dataset_num] + ',' + str(roc_auc) + ',' + str(prc_auc) + ',' + str(time.time()-t0) + '\n')
                    f.close()
                    t0 = time.time()
Пример #6
0
def main(_):
    global _train_epochs_completed
    global _validation_epochs_completed
    global _test_epochs_completed
    global _datasets
    global _validation_size
    global _test_labels

    file_name = 'out_' + str(int(time.time())) + '.csv'
    f = open(file_name, 'w')  # clear file
    f.write('dataset_num,dataset_name,roc_auc\n')
    f.close()
    _datasets = utils.remove_non_existing_datafiles(_datasets)
    for dataset_num in range(0, len(_datasets)):
        load_ENCODE_k562_dataset(dataset_num)

        x = tf.placeholder(tf.float32, shape=[None, 101 * 4])
        y_ = tf.placeholder(tf.float32, shape=[None, 2])
        conv_keep_prob = tf.placeholder(tf.float32)

        # Create the model
        x_image = tf.reshape(x, [-1, 101, 4, 1])

        # CONVOLUTIONAL LAYER(S)
        n_conv1 = 64
        L_conv1 = 9
        maxpool_len1 = 2
        conv1 = convolution2d(x_image,
                              n_conv1, [L_conv1, 4],
                              padding='VALID',
                              normalizer_fn=None)
        conv1_pool = max_pool2d(conv1, [maxpool_len1, 1], [maxpool_len1, 1])
        conv1_drop = tf.nn.dropout(conv1_pool, conv_keep_prob)
        conv1_pool_len = int((101 - L_conv1 + 1) / maxpool_len1)

        n_conv2 = n_conv1
        L_conv2 = 5
        maxpool_len2 = int(
            conv1_pool_len - L_conv2 +
            1)  # global maxpooling (max-pool across temporal domain)
        conv2 = convolution2d(conv1_drop,
                              n_conv2, [L_conv2, 1],
                              padding='VALID',
                              normalizer_fn=None)
        conv2_pool = max_pool2d(conv2, [maxpool_len2, 1], [maxpool_len2, 1])
        conv2_drop = tf.nn.dropout(conv2_pool, conv_keep_prob)

        # LINEAR FC LAYER
        y_conv = fully_connected(flatten(conv2_drop), 2, activation_fn=None)
        y_conv_softmax = tf.nn.softmax(y_conv)

        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=y_))
        train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        sess.run(tf.initialize_all_variables())

        i = 0
        prev_auc = 0.0001  # small value to prevent DIV0
        prev_train_epochs_compl = 0
        stop_condition = None
        t0 = time.time()
        this_conv_keep_prob = 0.5
        final_keep_prob = 0.75
        while stop_condition is None:
            if i % 1000 == 0:
                #t0 = time.time()
                pred_validation_labels = None
                true_validation_labels = None
                prev_validation_epochs_completed = _validation_epochs_completed
                while _validation_epochs_completed - prev_validation_epochs_completed == 0:  # do in mini batches because single GTX970 has insufficient memory to test all at once
                    if _validation_size > 1024 * 5:
                        validation_batch = get_next_batch(1, 1024)
                    else:
                        validation_batch = get_next_batch(1, 64)
                    if pred_validation_labels is None:
                        pred_validation_labels = y_conv_softmax.eval(
                            feed_dict={
                                x: validation_batch[0],
                                y_: validation_batch[1],
                                conv_keep_prob: 1.0
                            })
                        true_validation_labels = validation_batch[1]
                    else:
                        pred_validation_labels = numpy.vstack([
                            pred_validation_labels,
                            y_conv_softmax.eval(
                                feed_dict={
                                    x: validation_batch[0],
                                    y_: validation_batch[1],
                                    conv_keep_prob: 1.0
                                })
                        ])
                        true_validation_labels = numpy.vstack(
                            [true_validation_labels, validation_batch[1]])
                fpr, tpr, _ = roc_curve(true_validation_labels[:, 0],
                                        pred_validation_labels[:, 0])
                roc_auc = auc(fpr, tpr)
                perc_chg_auc = (roc_auc - prev_auc) / prev_auc
                print(
                    "%s, dataset %g, epoch %d, step %d, time elapsed %g, validation roc auc %g, perc chg in auc %g, conv_keep_prob %g"
                    % (_datasets[dataset_num], dataset_num,
                       _train_epochs_completed, i, time.time() - t0, roc_auc,
                       perc_chg_auc, this_conv_keep_prob))
                #check stop condition:
                if i == 3000 or i == 10000 or i == 20000:  # increase keep_prob at these iteration numbers (not epochs)
                    if this_conv_keep_prob < final_keep_prob:
                        this_conv_keep_prob += 0.2
                        if this_conv_keep_prob > final_keep_prob:
                            this_conv_keep_prob = final_keep_prob
                    else:
                        stop_condition = 1
                prev_train_epochs_compl = _train_epochs_completed
                prev_auc = roc_auc
                t0 = time.time()
            batch = get_next_batch(0)
            train_step.run(feed_dict={
                x: batch[0],
                y_: batch[1],
                conv_keep_prob: this_conv_keep_prob
            })
            i += 1

        pred_test_labels = None
        true_test_labels = None
        while _test_epochs_completed == 0:  # do testing in mini batches because single GTX970 has insufficient memory to test all at once
            test_batch = get_next_batch(2, 64)
            if pred_test_labels is None:
                pred_test_labels = y_conv_softmax.eval(feed_dict={
                    x: test_batch[0],
                    y_: test_batch[1],
                    conv_keep_prob: 1.0
                })
                true_test_labels = test_batch[1]
            else:
                pred_test_labels = numpy.vstack([
                    pred_test_labels,
                    y_conv_softmax.eval(feed_dict={
                        x: test_batch[0],
                        y_: test_batch[1],
                        conv_keep_prob: 1.0
                    })
                ])
                true_test_labels = numpy.vstack(
                    [true_test_labels, test_batch[1]])
        fpr, tpr, _ = roc_curve(true_test_labels[:, 0], pred_test_labels[:, 0])
        roc_auc = auc(fpr, tpr)
        print("%s, dataset %g, final test roc auc %g" %
              (_datasets[dataset_num], dataset_num, roc_auc))
        f = open(file_name, 'a')
        f.write(
            str(dataset_num) + ',' + _datasets[dataset_num] + ',' +
            str(roc_auc) + '\n')
        f.close()