Пример #1
0
 def run_eval(self, sess, data, summary_writer=None, step=0):
     y, y_pred, loss_, metrics, p_k = list(), list(), 0.0, None, None
     accuracy, loss = 0.0, 0.0
     merged_summary = self.summarizer.merge_all()
     i = 0
     for X, Y, tot in self.data.next_batch(data):
         feed_dict = {self.x: X, self.y: Y, self.z : np.random.random((Y.shape[0], self.config.solver.randomvar_dim)), self.keep_prob: 1}
         if i == tot-1 and summary_writer is not None:
             if data == "validation":
                 summ, loss_ =  sess.run([merged_summary, self.loss], feed_dict=feed_dict)
             else :
                 summ, loss_, accuracy_val = sess.run([merged_summary, self.loss, self.accuracy], feed_dict=feed_dict)
             summary_writer.add_summary(summ, step)
         else:
             if data == "validation":
                 loss_, Y_pred=  sess.run([self.loss, tf.nn.sigmoid(self.y_pred)], feed_dict=feed_dict)
                 p_k = patk(predictions=Y_pred, labels=Y)
             else :
                 loss_, Y_pred, accuracy_val = sess.run([self.loss, tf.nn.sigmoid(self.y_pred), self.accuracy], feed_dict=feed_dict)
                 metrics = evaluate(predictions=Y_pred, labels=Y)
                 p_k = patk(predictions=Y_pred, labels=Y)
                 accuracy += accuracy_val #metrics['accuracy']
         loss += loss_
         i += 1
     return loss / i , accuracy / self.config.batch_size, metrics, p_k
Пример #2
0
    def do_eval(self, sess, data):
        if data == "validation":
            err, accuracy = list(), list()
            X, Y = self.data.get_validation()
            for train, val in self.kf.split(X, y=Y):
                feed_dict = {self.x: X[val], self.y: Y[val], self.keep_prob: 1}
                loss_, Y_pred, accuracy_ = sess.run([self.loss, self.y_pred, self.accuracy], feed_dict=feed_dict)
                metrics = evaluate(predictions=Y_pred, labels=Y[val])
                err.append(loss_)
                accuracy.append(accuracy_)
            return np.mean(err), np.mean(accuracy), metrics

        if data == "test":
            X, Y = self.data.get_test()
            feed_dict = {self.x: X, self.y: Y, self.keep_prob: 1}
            loss_, Y_pred, accuracy_ = sess.run([self.loss, self.y_pred, self.accuracy], feed_dict=feed_dict)
            metrics = evaluate(predictions=Y_pred, labels=Y)
            return loss_, accuracy_, metrics
Пример #3
0
 def get_metrics(self, sess, data):
     accuracy, y_pred, i = 0.0, None, 0.0
     for X, Y, tot in self.data.next_batch(data):
         feed_dict = {self.x: X, self.y: Y, self.keep_prob: 1.0}
         Y_pred, accuracy_val = sess.run([tf.nn.sigmoid(self.y_pred), self.accuracy], feed_dict=feed_dict)
         metrics = evaluate(predictions=Y_pred, labels=Y)
         p_k = patk(predictions=Y_pred, labels=Y)
         accuracy += accuracy_val 
         i += 1
     return metrics, accuracy / i, p_k
Пример #4
0
def tune_model(config, entity_embedding, Q, labels, train_ids, val_ids):
    pred_ids = val_ids
    labelled_ids = train_ids
    Y_hat = np.dot(Q, entity_embedding.T)
    Y_hat = Y_hat.T
    Y_hat = normalize(Y_hat, axis=1, norm='l2')
    performances = ep.evaluate(Y_hat[pred_ids, :],
                               labels[pred_ids, :],
                               threshold=0,
                               multi_label=config.MULTI_LABEL)
    return performances
Пример #5
0
def tune_model_using_svm(config, entity_embedding, labels, train_ids, val_ids):
    pred_ids = val_ids
    labelled_ids = train_ids
    clf = OneVsRestClassifier(LinearSVC(random_state=0))
    entity_embedding = normalize(entity_embedding, axis=1, norm='l2')
    clf.fit(entity_embedding[labelled_ids, :], labels[labelled_ids, :])
    predictions = clf.decision_function(entity_embedding[pred_ids, :])
    performances = ep.evaluate(predictions,
                               labels[pred_ids, :],
                               threshold=0,
                               multi_label=config.MULTI_LABEL)
    return performances
Пример #6
0
def tune_model_using_lr(config, entity_embedding, labels, train_ids, val_ids):
    pred_ids = val_ids
    labelled_ids = train_ids
    clf = OneVsRestClassifier(LogisticRegression())
    entity_embedding = normalize(entity_embedding, axis=1, norm='l2')
    clf.fit(entity_embedding[labelled_ids, :], labels[labelled_ids, :])
    predictions = clf.predict_proba(entity_embedding[pred_ids, :])
    performances = ep.evaluate(predictions,
                               labels[pred_ids, :],
                               threshold=0,
                               multi_label=config.MULTI_LABEL)
    return performances
Пример #7
0
    def predict_results(self, sess, data, preds=None):
        if preds == None:
            preds = self.dataset.label_cache

        # nodes = np.where(self.dataset.get_nodes(data))[0]
        # labels_pred = preds[nodes]
        # labels_orig = preds[nodes]

        labels_orig, labels_pred = [], []
        for node in np.where(self.dataset.get_nodes(data))[0]:
            labels_orig.append(self.dataset.all_labels[node])
            labels_pred.append(preds[node])

        return perf.evaluate(labels_pred, labels_orig)
 def run_eval(self,
              sess,
              data,
              summary_writer=None,
              step=0,
              type_loss="NORMAL"):
     y, y_pred, loss_, metrics, p_k, Y, Y_pred = list(), list(
     ), 0.0, None, None, None, None
     accuracy, loss = 0.0, 0.0
     merged_summary = self.summarizer.merge_all()
     i = 0
     for X, Y, tot in self.data.next_batch(data):
         feed_dict = {self.x: X, self.y: Y, self.keep_prob: 1}
         if (type_loss == "AUTO"):
             summ, loss_ = sess.run([merged_summary, self.autoencoder_loss],
                                    feed_dict=feed_dict)
         else:
             if i == tot - 1 and summary_writer is not None:
                 if data == "validation":
                     summ, loss_ = sess.run([merged_summary, self.loss],
                                            feed_dict=feed_dict)
                 else:
                     summ, loss_, accuracy_val = sess.run(
                         [merged_summary, self.loss, self.accuracy],
                         feed_dict=feed_dict)
                 summary_writer.add_summary(summ, step)
             else:
                 if data == "validation":
                     loss_, Y_pred = sess.run(
                         [self.loss, tf.nn.sigmoid(self.y_pred)],
                         feed_dict=feed_dict)
                     p_k = patk(predictions=Y_pred, labels=Y)
                 else:
                     loss_, Y_pred, accuracy_val = sess.run(
                         [
                             self.loss,
                             tf.nn.sigmoid(self.y_pred), self.accuracy
                         ],
                         feed_dict=feed_dict)
                     metrics = evaluate(predictions=Y_pred, labels=Y)
                     accuracy += accuracy_val  #metrics['accuracy']
         loss += loss_
         i += 1
     if data == "test":
         #X, Y = self.data.get_test()
         p_k = patk(
             predictions=Y_pred, labels=Y
         )  #sess.run(self.patk, feed_dict={self.x: X, self.y: Y, self.keep_prob: 1})
     return loss / i, accuracy / self.config.batch_size, metrics, p_k
Пример #9
0
    def run_eval(self, sess, data_type, epoch, summary_writer=None):
        merged_summary = self.summarizer.merge_all()
        i = 0
        if data_type == "validation":
            X, Y = self.data.get_validation()
        elif data_type == 'test':
            X, Y = self.data.get_test()
        feed_dict = {self.x: X, self.y: Y, self.keep_prob: 1}

        # if only test data summary_writer is None
        if summary_writer is not None:
            summ, loss = sess.run([merged_summary, self.loss],
                                  feed_dict=feed_dict)
            summary_writer.add_summary(summ, epoch)

        loss, Y_pred = sess.run([self.loss, self.pred], feed_dict=feed_dict)
        metrics = evaluate(predictions=Y_pred, labels=Y)

        mean_loss = loss / X.shape[0]
        return mean_loss, metrics
Пример #10
0
    def run_epoch(self,
                  sess,
                  data,
                  train_op=None,
                  summary_writer=None,
                  verbose=1,
                  learning_rate=0,
                  get_emb=False):
        train = train_op
        if train_op is None:
            train_op = tf.no_op()
            keep_prob_in = 1
            keep_prob_out = 1
        else:
            keep_prob_in = self.config.mRNN._keep_prob_in
            keep_prob_out = self.config.mRNN._keep_prob_out

        # Set up all variables
        total_steps = np.sum(
            self.dataset.get_nodes(data))  # Number of Nodes to run through
        verbose = min(verbose, total_steps)
        node_ids, gradients, targets, attn_values, gating_values, emb = np.zeros(
            total_steps, dtype=int), [], [], [], [], np.zeros(
                (total_steps, self.config.mRNN._hidden_size))
        losses, predictions, metrics, entropy = dict(), dict(), dict(), dict()

        metrics['node'], metrics['path'], metrics['combined'] = [], [], []
        predictions['node'], predictions['path'], predictions['combined'] = np.zeros((total_steps, self.config.data_sets._len_labels)), \
                                                                            np.zeros((total_steps, self.config.data_sets._len_labels)), \
                                                                            np.zeros((total_steps, self.config.data_sets._len_labels))
        losses['node'], losses['path'], losses['combined'], losses[
            'total'] = [], [], [], []

        ########################################################################################################
        feed_dict = {
            self.ph_keep_prob_in: keep_prob_in,
            self.ph_keep_prob_out: keep_prob_out,
            self.ph_wce: self.dataset.wce,
            self.ph_lr: learning_rate
        }

        # Reset grad accumulator at the beginning
        sess.run([self.reset_grads], feed_dict=feed_dict)

        #Start Running Queue
        t = threading.Thread(target=self.load_and_enqueue, args=[sess, data])
        t.daemon = True
        t.start()
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        #Code profiling
        # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        # run_metadata = tf.RunMetadata()

        step = 0
        while step < total_steps:
            feed_dict = {
                self.ph_keep_prob_in: keep_prob_in,
                self.ph_keep_prob_out: keep_prob_out,
                self.ph_wce: self.dataset.wce,
                self.ph_lr: learning_rate
            }

            if ((step < total_steps - 1)
                    or not self.config.summaries) or summary_writer == None:
                id, grads, t_losses, t_pred_probs, target_label, t_attn_values, t_gating_values, t_entropy, t_emb = \
                    sess.run([self.node_id, train_op, self.losses, self.predictions, self.y_labels,
                              self.arch.attn_values, self.arch.gating_values, self.entropy, self.emb], feed_dict=feed_dict)#, options=options, run_metadata=run_metadata)

            else:
                summary, id, grads, t_losses, t_pred_probs, target_label, t_attn_values, t_gating_values, t_entropy, t_emb = \
                    sess.run([self.summary, self.node_id, train_op, self.losses, self.predictions, self.y_labels,
                              self.arch.attn_values, self.arch.gating_values, self.entropy, self.emb], feed_dict=feed_dict)#, options=options, run_metadata=run_metadata)
                #if summary_writer is not None:
                summary_writer.add_summary(
                    summary, self.arch.global_step.eval(session=sess))
                summary_writer.flush()

            #Saving code profile
            # fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            # chrome_trace = fetched_timeline.generate_chrome_trace_format()
            # with open('timeline_02_step_%d.json' % step, 'w') as f:
            #     f.write(chrome_trace)

            node_ids[step] = id

            # Accumulate attention values
            attn_values.append(np.std(t_attn_values[t_attn_values.nonzero()]))
            gating_values.append(
                np.abs(t_gating_values[0] - t_gating_values[1]))

            # Accumulate losses
            losses['node'].append(t_losses[0])
            losses['path'].append(t_losses[1])
            losses['combined'].append(t_losses[2])
            losses['total'].append(t_losses[3])

            #Accumulate entropy of prediction
            entropy[id] = t_entropy

            #accumulate mebeddings
            if get_emb:
                emb[step] = t_emb[0]

            # Accumulate Predictions
            for k, v in t_pred_probs.items():
                predictions[k][step] = v
            targets.append(np.squeeze(target_label))

            step += 1

            if train and (step % self.config.batch_size == 0
                          or step == total_steps):
                # Update gradients after batch_size or at the end of the current epoch
                #print("Queue size: ", sess.run([self.Q.size()]))

                batch_size = self.config.batch_size
                if step == total_steps:
                    batch_size = step % batch_size
                feed_dict[self.ph_batch_size] = batch_size

                sess.run([self.update_op], feed_dict=feed_dict)
                sess.run([self.reset_grads], feed_dict=feed_dict)

                if verbose and self.config.solver.gradients:
                    # get the absolute maximum gradient to each variable
                    gradients.append([np.max(np.abs(item)) for item in grads])
                    print("%d/%d :: " % (step, total_steps), end="")
                    for var, val in zip([
                            '-'.join(k.name.split('/')[-2:])
                            for k in tf.trainable_variables()
                    ], np.mean(gradients, axis=0)):
                        print("%s :: %.8f  " %
                              (var, val / self.config.batch_size),
                              end="")
                    print("\n")
                sys.stdout.flush()

        coord.request_stop()
        coord.join(threads)

        # Average statistics over batches
        for k in losses.keys():
            losses[k] = np.mean(losses[k])
        for k in metrics.keys():
            _, metrics[k] = perf.evaluate(
                np.asarray(predictions[k]),
                np.asarray(targets),
                multi_label=self.config.data_sets._multi_label)

        #Hack around to store attn and gating aggreagtes
        metrics['combined']['pak'] = np.mean(attn_values)
        metrics['combined']['average_precision'] = np.mean(gating_values)

        #return raw_predictions
        return node_ids, predictions, losses, metrics, np.asarray(
            attn_values), np.asarray(gating_values), np.mean(
                list(entropy.values())), emb
Пример #11
0
    def run_epoch(self,
                  sess,
                  data,
                  train_op=None,
                  summary_writer=None,
                  verbose=1):
        #Optimize the objective for one entire epoch via mini-batches

        if not train_op:
            train_op = tf.no_op()
            keep_prob_in = 1
            keep_prob_out = 1
        else:
            keep_prob_in = self.config.mRNN._keep_prob_in
            keep_prob_out = self.config.mRNN._keep_prob_out

        total_loss, label_loss = [], []
        f1_micro, f1_macro, accuracy, bae = [], [], [], []
        for step, (input_batch, input_batch2, seq, label_batch, tot, lengths,
                   mask) in enumerate(
                       self.dataset.next_batch(data,
                                               self.config.batch_size,
                                               shuffle=True)):
            # print("\n\n\nActualLabelCount: ", np.shape(input_batch), np.shape(input_batch2), np.shape(label_batch), np.shape(seq))
            feed_dict = self.create_feed_dict(input_batch, input_batch2,
                                              label_batch)
            feed_dict[self.keep_prob_in] = keep_prob_in
            feed_dict[self.keep_prob_out] = keep_prob_out
            feed_dict[self.wce_placeholder] = self.dataset.wce
            feed_dict[self.mask] = mask
            feed_dict[self.inp_lengths] = lengths

            # Writes loss summary @last step of the epoch
            if (step + 1) < tot:
                _, loss_value, pred_labels = sess.run(
                    [train_op, self.loss, self.arch.label_preds],
                    feed_dict=feed_dict)
            else:
                _, loss_value, summary, pred_labels = sess.run(
                    [train_op, self.loss, self.summary, self.arch.label_preds],
                    feed_dict=feed_dict)
                if summary_writer != None:
                    summary_writer.add_summary(
                        summary, self.arch.global_step.eval(session=sess))
                    summary_writer.flush()

            total_loss.append(loss_value[0])
            label_loss.append(loss_value[1])

            if verbose and step % verbose == 0:
                metrics = [0] * 10
                if self.config.solver._curr_label_loss:
                    metrics = perf.evaluate(pred_labels, label_batch)
                    f1_micro.append(metrics['micro_f1'])
                    f1_macro.append(metrics['macro_f1'])
                    accuracy.append(metrics['accuracy'])
                    bae.append(metrics['bae'])
                print(
                    '%d/%d : label = %0.4f : micro-F1 = %0.3f : accuracy = %0.3f : bae = %0.3f'
                    % (step, tot, np.mean(label_loss), np.mean(f1_micro),
                       np.mean(accuracy), np.mean(bae)),
                    end="\r")
                sys.stdout.flush()

        if verbose:
            sys.stdout.write('\r')
        return np.mean(total_loss), np.mean(f1_micro), np.mean(
            f1_macro), np.mean(accuracy), np.mean(bae)