Python variable_summaries 예제들, util.variable_summaries Python 예제들

예제 #1

0

파일 보기

    def output_layer(self, G, M, dropout):
        # M (?, m, 2h)
        # the softmax part is implemented together with loss function
        with tf.variable_scope('output_layer'):
            w_1 = tf.get_variable('w_start', shape=(10 * self.hidden_size, 1),
                initializer=tf.contrib.layers.xavier_initializer())
            w_2 = tf.get_variable('w_end', shape=(10 * self.hidden_size, 1),
                initializer=tf.contrib.layers.xavier_initializer())

            if self.summary_flag:
                variable_summaries(w_1, "output_w_1")
                variable_summaries(w_2, "output_w_2")

            self.batch_size = tf.shape(M)[0]

            temp1 = tf.concat(2, [G, M])  # (?, m, 10h)
            temp2 = tf.concat(2, [G, M])  # (?, m, 10h)
            temp_1_o = tf.nn.dropout(temp1, dropout)
            temp_2_o = tf.nn.dropout(temp2, dropout)

            w_1_tiled = tf.tile(tf.expand_dims(w_1, 0), [self.batch_size, 1, 1])
            w_2_tiled = tf.tile(tf.expand_dims(w_2, 0), [self.batch_size, 1, 1])

            h_1 = tf.squeeze(tf.einsum('aij,ajk->aik',temp_1_o, w_1_tiled)) # (?, m, 10h) * (?, 10h, 1) -> (?, m, 1)
            h_2 = tf.squeeze(tf.einsum('aij,ajk->aik',temp_2_o, w_2_tiled)) # (?, m, 10h) * (?, 10h, 1) -> (?, m, 1)
            return h_1, h_2

예제 #2

0

파일 보기

파일: train.py 프로젝트: zhangandyx/kaggle-iEEG-prediction

def optimize(loss_op):
    global_step = tf.Variable(0, name='global_step', trainable=False)
    learning_rate = tf.train.exponential_decay(LEARNING_RATE, global_step,
                                               LR_DECAY_STEPS, LR_DECAY, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    grads_and_vars = optimizer.compute_gradients(loss_op)
    for grad, trainable_var in grads_and_vars:
        variable_summaries(grad)
        variable_summaries(trainable_var)
    return global_step, optimizer.apply_gradients(grads_and_vars=grads_and_vars, global_step=global_step)

예제 #3

0

파일 보기

파일: train.py 프로젝트: zhangandyx/kaggle-iEEG-prediction

def evaluation(logits, labels):
    predict_floats = tf.round(tf.nn.sigmoid(logits), name="predictions")
    variable_summaries(predict_floats)
    label_floats = tf.cast(labels, tf.float32)

    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict_floats, label_floats), tf.float32))
    tf.scalar_summary("accuracy", accuracy)
    auc, update_auc = tf.contrib.metrics.streaming_auc(predict_floats, label_floats)
    tf.scalar_summary("auc", auc)

    return accuracy, auc, update_auc

예제 #4

0

파일 보기

def output_layer(inputs,weight,biase):
    #inputs 为gruoutputs shape = [batch_size,step,num_units]
    output = inputs[0] + inputs[1]
    output = tf.reshape(output,[-1,HIDDEN_NODES])

    #result shape = [batch_size * step,Hidden_node]
    result = tf.matmul(output,weight) + biase
    y = tf.nn.softmax(result,name="sofmax")
    util.variable_summaries(weight)
    util.variable_summaries(biase)

    return y

예제 #5

0

파일 보기

파일: train_simple.py 프로젝트: frostburn/rl_paneldepon

    def make_summaries(self):
        variable_summaries(self.W_hidden, "W_hidden")
        variable_summaries(self.b_hidden, "b_hidden")
        variable_summaries(self.W_output, "W_output")
        variable_summaries(self.b_output, "b_output")

        tf.summary.histogram('Q', self.output)

예제 #6

0

파일 보기

파일: encoder.py 프로젝트: atmabitp/Question-Answering-System-using-SQUAD

    def filter_layer(self, question, context):
        with tf.variable_scope('filter') as scope:
            w_f = tf.get_variable(
                'w_filter',
                shape=(self.max_question_len, 1),
                initializer=tf.contrib.layers.xavier_initializer())

            if self.summary_flag:
                variable_summaries(w_f, "filter_layer_weights")

            self.batch_size = tf.shape(question)[0]
            w_f_tiled = tf.tile(tf.expand_dims(w_f, 0),
                                [self.batch_size, 1, 1])
            cosine_sim = self._cosine_similarity(question,
                                                 context)  # (?, n, m)
            relevence = tf.einsum(
                'aij,aik->ajk', cosine_sim,
                w_f_tiled)  # (?, n, m) * (?, n, 1) => (?, m, 1)

        return context * relevence

예제 #7

0

파일 보기

    def bilinear_similarity(self, y_q, y_c):
        # y_q: (?, 2h, n)
        # y_c: (?, 2h, m)
        # S : (?, n, m)
        with tf.variable_scope('similarity') as scope:
            self.batch_size = tf.shape(y_c)[0]
            w_alpha = tf.get_variable(
                'w_alpha',
                shape=(2 * self.hidden_size, 2 * self.hidden_size),
                initializer=tf.contrib.layers.xavier_initializer())

            if self.summary_flag:
                variable_summaries(w_alpha, "bilinear_w_alpha")
            w_alpha_tiled = tf.tile(tf.expand_dims(w_alpha, 0),
                                    [self.batch_size, 1, 1])
            y_q_T = tf.transpose(y_q, perm=[0, 2, 1])  # U_T: (?, n, 2h)
            bi_S_temp = tf.einsum(
                'aij,ajk->aik', y_q_T,
                w_alpha_tiled)  # (?, n, 2h) * (2h, 2h) = (?, n, 2h)
            S = tf.einsum('aij,ajk->aik', bi_S_temp,
                          y_c)  # (?, n, 2h) * (?, 2h, m) = (?, n, m)
        return S

예제 #8

0

파일 보기

def bidi_gru(X):


    #第一层双向gru
    l1_f_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_NODES, name="l1_f_gru_cell", activation=maxout_activator)
    l1_b_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_NODES, name="l1_b_gru_cell", activation=maxout_activator)

    l1_f_state = l1_f_cell.zero_state(BATCH_SIZE,dtype=tf.float32)
    l1_b_state = l1_b_cell.zero_state(BATCH_SIZE,dtype=tf.float32)

    #返回结果，返回的结果为正向与反向的output与hstate
    result_1 = tf.nn.bidirectional_dynamic_rnn(l1_f_cell,l1_b_cell,X,initial_state_fw=l1_f_state,initial_state_bw=l1_b_state)

    # 第二层双向gru
    l2_f_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_NODES, name="l2_f_gru_cell", activation=maxout_activator)
    l2_b_cell = tf.nn.rnn_cell.GRUCell(HIDDEN_NODES, name="l2_b_gru_cell", activation=maxout_activator)

    l2_f_state = l2_f_cell.zero_state(BATCH_SIZE, dtype=tf.float32)
    l2_b_state = l2_b_cell.zero_state(BATCH_SIZE, dtype=tf.float32)

    #0 outputs: A tuple (output_fw, output_bw)
    #l1_output_fw = result_1[0][0]
    #l1_output_bw = result_1[0][1]

    #l2_input = result_1[0][0] + result_1[0][1] #tf.concat(result_1[0],2)

    result_2_f = tf.nn.dynamic_rnn(l2_f_cell,result_1[0][0],initial_state=l2_f_state)
    result_2_b = tf.nn.dynamic_rnn(l2_b_cell,result_1[0][1],initial_state=l2_b_state)

    #result_2 = tf.nn.bidirectional_dynamic_rnn(l2_f_cell,l2_b_cell,l2_input,initial_state_fw=l2_f_state,initial_state_bw=l2_b_state)

    util.variable_summaries(l1_f_state)
    util.variable_summaries(l1_b_state)
    util.variable_summaries(l2_f_state)
    util.variable_summaries(l2_b_state)
    #只返回outputs
    return (result_2_f[0],result_2_b[0])

예제 #9

0

파일 보기

    def similarity(self, y_q, y_c):
        # y_q: (?, 2h, n)
        # y_c: (?, 2h, m)
        # S : (?, m, n)
        with tf.variable_scope('similarity') as scope:
            w_s1 = tf.get_variable(
                'w_sim_1',
                shape=(2 * self.hidden_size, 1),
                initializer=tf.contrib.layers.xavier_initializer())
            w_s2 = tf.get_variable(
                'w_sim_2',
                shape=(2 * self.hidden_size, 1),
                initializer=tf.contrib.layers.xavier_initializer())
            w_s3 = tf.get_variable(
                'w_sim_3',
                shape=(2 * self.hidden_size, 1),
                initializer=tf.contrib.layers.xavier_initializer())

            if self.summary_flag:
                variable_summaries(w_s1, "w_sim_1")
                variable_summaries(w_s2, "w_sim_2")
                variable_summaries(w_s3, "w_sim_3")

            self.batch_size = tf.shape(y_c)[0]

            w_s1_tiled = tf.tile(tf.expand_dims(w_s1, 0),
                                 [self.batch_size, 1, 1])
            w_s2_tiled = tf.tile(tf.expand_dims(w_s2, 0),
                                 [self.batch_size, 1, 1])
            S_h = tf.einsum('aji,ajk->aki', y_c,
                            w_s1_tiled)  # (?, 2h, m) * (?, 2h, 1) => (?, 1, m)
            S_u = tf.einsum('aji,ajk->aik', y_q,
                            w_s2_tiled)  # (?, 2h, n) * (?, 2h, 1) => (?, n, 1)

            S_h_tiled = tf.tile(
                S_h, [1, self.max_question_len, 1])  # (?, 1, m) => (?, n, m)
            S_u_tiled = tf.tile(
                S_u, [1, 1, self.max_context_len])  # (?, n, 1) => (?, n, m)
            S_cov = tf.einsum('aij,aik->ajk', y_q, y_c *
                              w_s3)  # (?, 2h, n) * (?, 2h, m) => (?, n, m)
            S = S_cov + S_h_tiled + S_u_tiled
        return S

예제 #10

0

파일 보기

def embedding(embed_before_v,embed_before_v_sd,embed_before_v_avg,weights,biases):
    #embed_v shape = [batch_size * step , 50]
    embed_v = tf.matmul(embed_before_v,weights["embed_weight_V"],name="embed_v_matmul_embed_before_v") + biases["embed_biases_V"]
    # embed_v shape = [batch_size * step , 50]
    embed_v_sd = tf.matmul(embed_before_v_sd,weights["embed_weight_Vsd"],name="embed_v_sd_matmul_embed_before_v_sd")+biases["embed_biases_Vsd"]
    # embed_v shape = [batch_size* step , 50]
    embed_v_avg = tf.matmul(embed_before_v_avg,weights["embed_weight_Vavg"],name="embed_v_avg_matmul_embed_before_v_avg") + biases["embed_biases_Vavg"]

    #先加v与Vsd
    #result = embed_v + embed_v_sd

    util.variable_summaries(embed_v)
    util.variable_summaries(embed_v_sd)
    util.variable_summaries(embed_v_avg)

    result = tf.add_n([embed_v,embed_v_sd,embed_v_avg])


    result = tf.reshape(result,[-1,HIDDEN_NODES,EMBEDDING_DIMS])
    return result

예제 #11

0

파일 보기

def build_model(features,
                labels,
                hidden,
                learn_rate=0.1,
                beta=0.01,
                beta_out=0.01,
                model_dir=None):
    '''Build a linear classifier with fully-connected hidden layers.

    @param features: A list of feature names.
    @param labels: A list of label names.
    @param hidden: A list of ints indicating how many neurons to put in each hidden layer.
    @param learn_rate: The training rate, defaults to 0.1.
    @param beta: The regularization rate, defaults to 0.01.
    @param beta_out: The regularization rate for the output layer, defaults to 0.01.
    @param model_dir: A directory to store the model summaries in.

    @return: A 4-tuple of training, testing, plotting, and closing functions.
    '''

    #print 'building MLP with alpha=%f, beta=%f' % (learn_rate, beta)

    n_in = len(features)
    n_out = len(labels)

    x = tf.placeholder(tf.float64, [None, n_in], name='x')
    keep_prob = tf.placeholder(tf.float64, name='keep_prob')

    global_step = tf.Variable(0, name='global_step', trainable=False)

    h = x
    n = n_in
    Ws = []
    bs = []
    for i, n_hidden in enumerate(hidden):
        n_hidden = int(n_hidden)
        with tf.name_scope('hidden-' + str(i)):
            W = tf.Variable(
                tf.truncated_normal(
                    [n, n_hidden],
                    # we're using a ReLU, so initialize weights with
                    # a variance of 2/n (according to He et al 2015)
                    stddev=math.sqrt(2.0 / float(n)),
                    dtype=tf.float64),
                name='W_' + str(i))
            b = tf.Variable(tf.constant(0.0,
                                        shape=[n_hidden],
                                        dtype=tf.float64),
                            name='b_' + str(i))
            util.variable_summaries(W, 'W_' + str(i))
            util.variable_summaries(b, 'b_' + str(i))
            Ws.append(W)
            bs.append(b)
            # h = tf.nn.dropout(tf.nn.relu(tf.matmul(h, W) + b), keep_prob)
            h = tf.nn.relu(tf.matmul(h, W) + b)
        n = n_hidden

    with tf.name_scope('output'):
        W = tf.Variable(tf.truncated_normal([n, n_out],
                                            stddev=math.sqrt(1.0 / float(n)),
                                            dtype=tf.float64),
                        name='W')
        b = tf.Variable(tf.zeros([n_out], dtype=tf.float64), name='b')
        util.variable_summaries(W, 'W')
        util.variable_summaries(b, 'b')
        y = tf.matmul(h, W) + b

    y_ = tf.placeholder(tf.float64, [None, n_out], name='y_')

    saver = tf.train.Saver(Ws + bs)

    with tf.name_scope('cross_entropy'):
        cross_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
        tf.summary.scalar('cross_loss', cross_loss)
        regularizers = beta * sum(tf.nn.l2_loss(w)
                                  for w in Ws) + beta_out * tf.nn.l2_loss(W)
        tf.summary.scalar('l2_loss', regularizers)
        loss = cross_loss + regularizers
        tf.summary.scalar('loss', loss)
    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(learn_rate).minimize(
            loss, global_step=global_step)

    if n_out >= 2:
        correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y), 1),
                                      tf.argmax(y_, 1))
    else:
        correct_prediction = tf.equal(tf.sign(y), y_)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))
    tf.summary.scalar('accuracy', accuracy)

    k = tf.placeholder(tf.int32, [], name='k')
    top_k = tf.nn.top_k(tf.transpose(tf.nn.softmax(y)), k)
    sess = tf.InteractiveSession()
    merged = tf.summary.merge_all()
    if model_dir:
        summary_writer = tf.summary.FileWriter(model_dir, sess.graph)

    ## NOTE: must be last!!
    tf.global_variables_initializer().run()

    # saver.restore(sess, 'hidden_model')

    def train(data, i, validation=None, verbose=False, batch_size=200):
        for _, batch in data.groupby(np.arange(len(data)) // batch_size,
                                     sort=False):
            # print(batch.shape)
            summary, _, step = sess.run(
                [merged, train_step, global_step],
                feed_dict={
                    x: batch[features],
                    y_: batch[labels],
                    # keep_prob: 0.5})
                    keep_prob: 1.0
                }  # TODO: should we use dropout??
            )
            if model_dir:
                summary_writer.add_summary(summary, step)
        ws1, bs1, w1, b1 = sess.run([Ws, bs, W, b])
        with open('models/hidden-' + '-'.join(hidden) + '.json', 'w') as f:
            d = {
                'hidden': [{
                    'weights': ws2.tolist(),
                    'biases': bs2.tolist()
                } for ws2, bs2 in zip(ws1, bs1)],
                'output': {
                    'weights': w1.tolist(),
                    'biases': b1.tolist()
                }
            }
            json.dump(d, f, indent=2)

    def test(data, store_predictions=False, loud=True):
        acc1 = 0
        acc2 = 0
        acc3 = 0
        ps = []
        rs = []
        fs = []
        cs = []
        ts = []
        times = []

        for f, d in data:
            start = time()
            (top_values, top_indices), truth, observed = sess.run(
                [top_k, tf.argmax(y_, 1),
                 tf.argmax(y, 1)],
                feed_dict={
                    x: d[features],
                    y_: d[labels],
                    k: min(3, len(d)),
                    keep_prob: 1.0
                })
            times.append(time() - start)
            if store_predictions:
                dir, f = os.path.split(f)
                f, _ = os.path.splitext(f)
                f = os.path.join(dir, 'hidden-' + '-'.join(hidden),
                                 f + '.ml.out')
                if not os.path.exists(os.path.dirname(f)):
                    os.makedirs(os.path.dirname(f))
                with open(f, 'w') as f:
                    for idx in top_indices[1]:
                        span = d.iloc[idx]['SourceSpan']
                        f.write(span)
                        f.write('\n')

            inc1 = 0
            inc2 = 0
            inc3 = 0
            correct = 0
            n_top = len(top_indices[1])
            if d['L-DidChange'][top_indices[1][0]] == 1:
                inc1 = 1
                inc2 = 1
                inc3 = 1
                correct += 1
            if n_top > 1 and d['L-DidChange'][top_indices[1][1]] == 1:
                inc2 = 1
                inc3 = 1
                correct += 1
            if n_top > 2 and d['L-DidChange'][top_indices[1][2]] == 1:
                inc3 = 1
                correct += 1
            acc1 += inc1
            acc2 += inc2
            acc3 += inc3

            # True positives.
            #tp = np.sum(np.logical_and(truth, observed))
            # False positives.
            #fp = np.sum(np.logical_and(np.logical_not(truth), observed))
            # False negatives.
            #fn = np.sum(np.logical_and(truth, np.logical_not(observed)))
            # True negatives.
            #tn = np.sum(np.logical_and(np.logical_not(truth), np.logical_not(observed)))
            # precision = np.float64(tp) / np.float64(tp + fp)
            # modified recall where top-3 predictions are the only "true" predictions
            c = len(d[(d['L-DidChange'] == 1) & (d['F-InSlice'] == 1)])
            recall = np.float64(correct) / np.float64(c)
            # fscore = np.float64(2.0) * precision * recall / (precision + recall)
            # if not np.isnan(precision):
            #     ps.append(precision)
            if not np.isnan(recall):
                rs.append(recall)
            cs.append(c)
            ts.append(len(d))
            # if not np.isnan(fscore):
            #     fs.append(fscore)
            #cs.append(tp+fn)
            #ts.append(tp+fp+fn+tn)
            #print('true changes: %d' % (tp+fn))
            #print('p/r/f1: %.3f / %.3f / %.3f' % (precision, recall, fscore))
            #print('')

        acc1 = float(acc1) / len(data)
        acc2 = float(acc2) / len(data)
        acc3 = float(acc3) / len(data)
        if loud:
            print('final accuracy: %.3f / %.3f / %.3f' % (acc1, acc2, acc3))
            print('avg/std recall: %.3f / %.3f' % (np.mean(rs), np.std(rs)))
            # print('avg p/r/f1: %.3f / %.3f / %.3f' % (np.mean(ps), np.mean(rs), np.mean(fs)))
            # print('std p/r/f1: %.3f / %.3f / %.3f' % (np.std(ps), np.std(rs), np.std(fs)))
            print('avg / std / med samples: %.2f / %.2f / %.2f' %
                  (np.mean(ts), np.std(ts), np.median(ts)))
            print('avg / std / med changes: %.2f / %.2f / %.2f' %
                  (np.mean(cs), np.std(cs), np.median(cs)))
            print('avg prediction time: %f' % np.mean(times))

        saver.save(sess, 'hidden-' + '-'.join(hidden))

        return {
            'top-1': acc1,
            'top-2': acc2,
            'top-3': acc3,
            'recall': np.mean(rs)
        }

    def plot():
        w = sess.run(tf.transpose(W))
        plt.matshow(w, cmap='hot', interpolation='nearest')
        plt.xticks(np.arange(len(features)), features, rotation=90)
        plt.yticks(np.arange(len(labels)), labels)
        # plt.legend()
        plt.show()

    def close():
        sess.close()
        tf.reset_default_graph()

    return train, test, plot, close

예제 #12

0

파일 보기

 def make_summaries(self):
     for variable, name in zip(self.variables, self.variable_names):
         variable_summaries(variable, name)
     tf.summary.histogram("policy_head", self.policy_head)
     tf.summary.histogram("value_head", self.value_head)

예제 #13

0

파일 보기

파일: tf-grads.py 프로젝트: zhangandyx/kaggle-iEEG-prediction

batch_size = 50

x_shape = [3]
# Initialize placeholders
x_data = tf.placeholder(shape=[
    None,
] + x_shape, dtype=tf.float32)
y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32)

# Create variables for linear regression
A = tf.Variable(tf.random_normal(shape=x_shape + [1]), name='weights')
b = tf.Variable(tf.random_normal(shape=[1, 1]), name='bias')

# Declare model operations
model_output = tf.add(tf.matmul(x_data, A), b, name='activation')
variable_summaries(model_output)

# Declare the elastic net loss function
elastic_param1 = tf.constant(1.)
elastic_param2 = tf.constant(1.)
l1_a_loss = tf.reduce_mean(tf.abs(A))
l2_a_loss = tf.reduce_mean(tf.square(A))
e1_term = tf.mul(elastic_param1, l1_a_loss, name='l1_reg')
e2_term = tf.mul(elastic_param2, l2_a_loss, name='l2_reg')
ce_loss = tf.reduce_mean(tf.square(model_output - y_target), name='ce_loss')
total_loss = ce_loss + e1_term + e2_term

tf.scalar_summary('l1 loss', l1_a_loss)
tf.scalar_summary('l2 loss', l2_a_loss)
tf.scalar_summary('ce loss', ce_loss)
tf.scalar_summary('total loss', total_loss)

예제 #14

0

파일 보기

파일: linear.py 프로젝트: ucsd-progsys/nate

def build_model(features, labels, learn_rate=0.1, beta=0.01, model_dir=None):
    '''Build a linear classifier.

    @param features: A list of feature names.
    @param labels: A list of label names.
    @param learn_rate: The training rate, defaults to 0.1.
    @param beta: The regularization rate, defaults to 0.01.
    @param model_dir: A directory to store the model summaries in.

    @return: A 4-tuple of training, testing, plotting, and closing functions.
    '''

    n_in = len(features)
    n_out = len(labels)

    x = tf.placeholder(tf.float64, [None, n_in], name='x')

    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.name_scope('linear'):
        W = tf.Variable(tf.truncated_normal([n_in, n_out],
                                            stddev=1.0 /
                                            math.sqrt(float(n_in)),
                                            dtype=tf.float64),
                        name='W')
        b = tf.Variable(tf.zeros([n_out], dtype=tf.float64), name='b')
        util.variable_summaries(W, 'W')
        util.variable_summaries(b, 'b')
        y = tf.matmul(x, W) + b

    y_ = tf.placeholder(tf.float64, [None, n_out], name='y_')

    with tf.name_scope('cross_entropy'):
        cross_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
        tf.summary.scalar('cross_loss', cross_loss)
        regularizers = beta * tf.nn.l2_loss(W)
        tf.summary.scalar('l2_loss', regularizers)
        loss = cross_loss + regularizers
        tf.summary.scalar('loss', loss)
    with tf.name_scope('train'):
        train_step = tf.train.AdamOptimizer(learn_rate).minimize(
            loss, global_step=global_step)

    sess = tf.InteractiveSession()
    merged = tf.summary.merge_all()
    if model_dir:
        summary_writer = tf.summary.FileWriter(model_dir, sess.graph)

    if n_out >= 2:
        correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y), 1),
                                      tf.argmax(y_, 1))
    else:
        correct_prediction = tf.equal(tf.sign(y), y_)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))

    k = tf.placeholder(tf.int32, [], name='k')
    top_k = tf.nn.top_k(tf.transpose(tf.nn.softmax(y)), k)

    ## NOTE: must be last!!
    tf.global_variables_initializer().run()

    def train(data, i, validation=None, verbose=False, batch_size=200):
        for _, batch in data.groupby(np.arange(len(data)) // batch_size,
                                     sort=False):
            summary, _, step = sess.run([merged, train_step, global_step],
                                        feed_dict={
                                            x: batch[features],
                                            y_: batch[labels]
                                        })
            if model_dir:
                summary_writer.add_summary(summary, step)

    def test(data, store_predictions=False, loud=True):
        acc1 = 0
        acc2 = 0
        acc3 = 0
        ps = []
        rs = []
        fs = []
        cs = []
        ts = []
        times = []

        for f, d in data:
            start = time()
            (top_values, top_indices), truth, observed = sess.run(
                [top_k, tf.argmax(y_, 1),
                 tf.argmax(y, 1)],
                feed_dict={
                    x: d[features],
                    y_: d[labels],
                    k: min(3, len(d))
                })
            times.append(time() - start)
            if store_predictions:
                dir, f = os.path.split(f)
                f, _ = os.path.splitext(f)
                f = os.path.join(dir, 'linear', f + '.ml.out')
                if not os.path.exists(os.path.dirname(f)):
                    os.makedirs(os.path.dirname(f))
                with open(f, 'w') as f:
                    for idx in top_indices[1]:
                        span = d.iloc[idx]['SourceSpan']
                        f.write(span)
                        f.write('\n')

            inc1 = 0
            inc2 = 0
            inc3 = 0
            correct = 0
            n_top = len(top_indices[1])
            if d['L-DidChange'][top_indices[1][0]] == 1:
                inc1 = 1
                inc2 = 1
                inc3 = 1
                correct += 1
            if n_top > 1 and d['L-DidChange'][top_indices[1][1]] == 1:
                inc2 = 1
                inc3 = 1
                correct += 1
            if n_top > 2 and d['L-DidChange'][top_indices[1][2]] == 1:
                inc3 = 1
                correct += 1
            acc1 += inc1
            acc2 += inc2
            acc3 += inc3

            # True positives.
            #tp = np.sum(np.logical_and(truth, observed))
            # False positives.
            #fp = np.sum(np.logical_and(np.logical_not(truth), observed))
            # False negatives.
            #fn = np.sum(np.logical_and(truth, np.logical_not(observed)))
            # True negatives.
            #tn = np.sum(np.logical_and(np.logical_not(truth), np.logical_not(observed)))
            # precision = np.float64(tp) / np.float64(tp + fp)
            # modified recall where top-3 predictions are the only "true" predictions
            c = len(d[(d['L-DidChange'] == 1) & (d['F-InSlice'] == 1)])
            recall = np.float64(correct) / np.float64(c)
            # fscore = np.float64(2.0) * precision * recall / (precision + recall)
            # if not np.isnan(precision):
            #     ps.append(precision)
            if not np.isnan(recall):
                rs.append(recall)
            cs.append(c)
            ts.append(len(d))
            # if not np.isnan(fscore):
            #     fs.append(fscore)
            #cs.append(tp+fn)
            #ts.append(tp+fp+fn+tn)
            #print('true changes: %d' % (tp+fn))
            #print('p/r/f1: %.3f / %.3f / %.3f' % (precision, recall, fscore))
            #print('')

        acc1 = float(acc1) / len(data)
        acc2 = float(acc2) / len(data)
        acc3 = float(acc3) / len(data)
        if loud:
            print('final accuracy: %.3f / %.3f / %.3f' % (acc1, acc2, acc3))
            print('avg/std recall: %.3f / %.3f' % (np.mean(rs), np.std(rs)))
            print('avg / std / med samples: %.2f / %.2f / %.2f' %
                  (np.mean(ts), np.std(ts), np.median(ts)))
            print('avg / std / med changes: %.2f / %.2f / %.2f' %
                  (np.mean(cs), np.std(cs), np.median(cs)))

            print('avg prediction time: %f' % np.mean(times))
        return {
            'top-1': acc1,
            'top-2': acc2,
            'top-3': acc3,
            'recall': np.mean(rs)
        }

    def plot():
        w = sess.run(tf.transpose(W))
        plt.matshow(w, cmap='hot', interpolation='nearest')
        plt.xticks(np.arange(len(features)), features, rotation=90)
        plt.yticks(np.arange(len(labels)), labels)
        # plt.legend()
        plt.show()

    def close():
        sess.close()
        tf.reset_default_graph()

    return train, test, plot, close