示例#1
0
    def run_epoch(self, epoch_name: str, epoch_num, data, is_training: bool):
        loss = 0
        edge_loss, kl_loss, node_symbol_loss = 0, 0, 0
        start_time = time.time()
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, is_training),
                                          max_queue_size=5)

        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            batch_data[self.placeholders['is_generative']] = False
            # Randomly sample from normal distribution
            batch_data[self.placeholders['z_prior']] = utils.generate_std_normal(\
                self.params['batch_size'], batch_data[self.placeholders['num_vertices']],self.params['encoding_size'])
            batch_data[self.placeholders['z_prior_in']] = utils.generate_std_normal(\
                self.params['batch_size'], batch_data[self.placeholders['num_vertices']],self.params['hidden_size'])

            if is_training:
                batch_data[self.placeholders[
                    'out_layer_dropout_keep_prob']] = self.params[
                        'out_layer_dropout_keep_prob']
                fetch_list = [
                    self.ops['loss'], self.ops['mean_edge_loss_in'],
                    self.ops['mean_kl_loss_in'],
                    self.ops['mean_node_symbol_loss_in'],
                    self.ops['train_step']
                ]
            else:
                batch_data[
                    self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                fetch_list = [
                    self.ops['loss'], self.ops['mean_edge_loss_in'],
                    self.ops['mean_kl_loss_in'],
                    self.ops['mean_node_symbol_loss_in']
                ]

            result = self.sess.run(fetch_list, feed_dict=batch_data)

            batch_loss = result[0]
            loss += batch_loss * num_graphs

            edge_loss += result[1] * num_graphs
            kl_loss += result[2] * num_graphs
            node_symbol_loss += result[3] * num_graphs

            print(
                "Running %s, batch %i (has %i graphs). Loss so far: %.4f. Edge loss: %.4f, KL loss: %.4f, Node symbol loss: %.4f"
                % (epoch_name, step, num_graphs, loss / processed_graphs,
                   edge_loss / processed_graphs, kl_loss / processed_graphs,
                   node_symbol_loss / processed_graphs),
                end='\r')

        loss = loss / processed_graphs
        edge_loss = edge_loss / processed_graphs
        kl_loss = kl_loss / processed_graphs
        node_symbol_loss = node_symbol_loss / processed_graphs
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return (loss, edge_loss, kl_loss, node_symbol_loss), instance_per_sec
    def pred(self, testfile):#output the planner with the lowest prob to be timeout, and get the percentage of timeout using these planners
        if os.path.exists(self.best_model_file):
            self.restore_model(self.best_model_file)
        testdata = self.load_data(testfile,is_training_data=False)
        processed_graphs = 0
        accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']]
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(testdata, False), max_queue_size=5)
        preds = []
        accs = []
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs

            batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0
            fetch_list = [self.ops['predicted_values'], accuracy_ops]

            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_pred, batch_accuracies) = (result[0], result[1])
            batch_pred = np.array(batch_pred)
            if len(batch_pred.shape)==1:
                batch_pred = np.expand_dims(batch_pred,1)

            preds.append(batch_pred.T)
            accs.append(np.array(batch_accuracies)*num_graphs)
        planner = np.argmin(np.concatenate(preds,0),axis=1)
        pred_labels = [testdata[ex_id]["labels"][planner[ex_id]] for ex_id in range(len(testdata))]
        return np.concatenate(preds,0), sum(pred_labels), len(testdata)
示例#3
0
 def generate_vector(self):
     with self.graph.as_default():
         if self.args.get('--restore') is not None:
             self.myalldata_dir = '/data/bwj/test/mangrove/ml/ggnn/data/singles/rw/'
             self.data_dir = self.myalldata_dir
             for _, _, files in os.walk(self.myalldata_dir):
                 for singlefile in files:
                     single_data = self.load_data(singlefile,
                                                  is_training_data=False)
                     batch_iterator = ThreadedIterator(
                         self.make_minibatch_iterator(
                             self.valid_data, False),
                         max_queue_size=5)
                     for batch_data in batch_iterator:
                         batch_data[self.placeholders[
                             'out_layer_dropout_keep_prob']] = 1.0
                         final_node = self.sess.run(
                             self.compute_final_node_representations(),
                             feed_dict=batch_data)
                         final_node = np.mean(final_node, 0)
                         np.save(
                             './outputs/ggnn_vectors/{}_final_node.npy'.
                             format(singlefile.replace('.json',
                                                       '')), final_node)
                     #_, singel_accs, _, _, _, _ = self.run_epoch("Generate {}'s vector".format(singlefile), single_data, False, True)
                     #best_val_acc = np.sum(valid_accs)
                     #print("%s Pred: %.2f" % (singlefile, best_val_acc))
                     print("{} ok".format(singlefile))
示例#4
0
    def run_epoch(self, epoch_name: str, data, is_training: bool):

        loss = 0
        accuracies = []
        accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']]
        start_time = time.time()
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(data, is_training), max_queue_size=5)
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            if is_training:
                batch_data[self.placeholders['out_layer_dropout_keep_prob']] = self.params['out_layer_dropout_keep_prob']
                fetch_list = [self.ops['loss'], accuracy_ops, self.ops['train_step']]
            else:
                batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                fetch_list = [self.ops['loss'], accuracy_ops]
            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_loss, batch_accuracies) = (result[0], result[1])
            loss += batch_loss
            accuracies.append(np.array(batch_accuracies) * num_graphs)

            print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" % (epoch_name,
                                                                               step,
                                                                               num_graphs,
                                                                               loss / processed_graphs),
                  end='\r')

        accuracies = np.sum(accuracies, axis=0) / processed_graphs
        loss = loss / processed_graphs
        # error_ratios = accuracies / chemical_accuracies[self.params["task_ids"]]
        error_ratios = accuracies
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return loss, accuracies, error_ratios, instance_per_sec
示例#5
0
    def pred(self, testfile, contextfile):
        if os.path.exists(self.best_model_file):
            self.restore_model(self.best_model_file)
        testdata, skipped_graphs = self.load_data(testfile,is_training_data=False, context_file= contextfile)
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(testdata, False), max_queue_size=5)
        preds = []

        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs

            batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0
            fetch_list = [self.ops['predicted_values']]

            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_pred) = (result[0])
            batch_pred = np.array(batch_pred)
            if len(batch_pred.shape) == 1:
                batch_pred = np.expand_dims(batch_pred, 1)

            preds.append(batch_pred.T)

        preds = np.concatenate(preds,0)
        pred_id = np.argmin(preds, axis=1)
        pred_labels = [testdata[ex_id]["labels"][pred_id[ex_id]] for ex_id in range(len(testdata))]
        return sum(pred_labels), len(testdata), preds, skipped_graphs
示例#6
0
    def run_epoch(self, epoch_name: str, data):
        loss = 0
        start_time = time.time()
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, True),
                                          max_queue_size=5)
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            fetch_list = [
                self.ops['loss'], self.ops['train_step'],
                self.placeholders['state_Dec']
            ]

            result = self.sess.run(fetch_list, feed_dict=batch_data)
            batch_loss = result[0]
            loss += batch_loss * num_graphs
            print("Running epoch: " + str(epoch_name) + ", batch " +
                  str(step) + ", num_graphs: " + str(num_graphs) +
                  ", loss so far: " + str(loss / processed_graphs))

        loss = loss / processed_graphs
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return loss, instance_per_sec
    def run_epoch(self,
                  epoch_name: str,
                  data,
                  is_training: bool,
                  start_step: int = 0):
        chemical_accuracies = np.array([
            0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113,
            0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976,
            0.004527465, 0.012292586, 0.037467458
        ])

        loss = 0
        accuracies = []
        accuracy_ops = [
            self.ops['accuracy_task%i' % task_id]
            for task_id in self.params['task_ids']
        ]
        start_time = time.time()
        processed_graphs = 0
        steps = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, is_training),
                                          max_queue_size=5)
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            if is_training:
                batch_data[self.placeholders[
                    'out_layer_dropout_keep_prob']] = self.params[
                        'out_layer_dropout_keep_prob']
                fetch_list = [
                    self.ops['loss'], accuracy_ops, self.ops['summary'],
                    self.ops['train_step']
                ]
            else:
                batch_data[
                    self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                fetch_list = [
                    self.ops['loss'], accuracy_ops, self.ops['summary']
                ]
            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_loss, batch_accuracies,
             batch_summary) = (result[0], result[1], result[2])
            writer = self.train_writer if is_training else self.valid_writer
            writer.add_summary(batch_summary, start_step + step)
            loss += batch_loss * num_graphs
            accuracies.append(np.array(batch_accuracies) * num_graphs)

            print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" %
                  (epoch_name, step, num_graphs, loss / processed_graphs),
                  end='\r')
            steps += 1

        accuracies = np.sum(accuracies, axis=0) / processed_graphs
        loss = loss / processed_graphs
        error_ratios = accuracies / chemical_accuracies[
            self.params["task_ids"]]
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return loss, accuracies, error_ratios, instance_per_sec, steps
示例#8
0
    def run_epoch(self, epoch_name: str, epoch_num, data, is_training: bool):
        loss = 0
        start_time = time.time()
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, is_training),
                                          max_queue_size=5)

        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            batch_data[self.placeholders['is_generative']] = False
            # Randomly sample from normal distribution
            batch_data[self.placeholders['z_prior']] = utils.generate_std_normal(\
                self.params['batch_size'], batch_data[self.placeholders['num_vertices']],self.params['hidden_size'])
            if is_training:
                batch_data[self.placeholders[
                    'out_layer_dropout_keep_prob']] = self.params[
                        'out_layer_dropout_keep_prob']
                fetch_list = [
                    self.ops['loss'], self.ops['train_step'],
                    self.ops["edge_loss"], self.ops['kl_loss'],
                    self.ops['node_symbol_prob'],
                    self.placeholders['node_symbols'],
                    self.ops['qed_computed_values'],
                    self.placeholders['target_values'],
                    self.ops['total_qed_loss'], self.ops['mean'],
                    self.ops['logvariance'], self.ops['grads'],
                    self.ops['mean_edge_loss'],
                    self.ops['mean_node_symbol_loss'],
                    self.ops['mean_kl_loss'], self.ops['mean_total_qed_loss']
                ]
            else:
                batch_data[
                    self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                fetch_list = [
                    self.ops['mean_edge_loss'], self.ops['accuracy_task0']
                ]
            result = self.sess.run(fetch_list, feed_dict=batch_data)
            """try:
                if is_training:
                    self.save_intermediate_results(batch_data[self.placeholders['adjacency_matrix']], 
                        result[11], result[12], result[4], result[5], result[9], result[10], result[6], result[7], result[13], result[14])
            except IndexError:
                pass"""

            batch_loss = result[0]
            loss += batch_loss * num_graphs

            print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" %
                  (epoch_name, step, num_graphs, loss / processed_graphs),
                  end='\r')
        loss = loss / processed_graphs
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return loss, instance_per_sec
示例#9
0
    def findRep(self, data, name, training):
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, False),
                                          max_queue_size=1)
        grReps_enc = []
        grReps_dec = []
        grReps_static = []
        step_enc = 0
        step_dec = 0
        step_static = 0

        start_att = self.params["win"]
        step_att = self.params["win"]

        start_idx_enc = step_enc
        start_idx_dec = step_dec
        start_idx_static = step_static

        lastSeenGr_enc = -1
        lastSeenGr_dec = -1
        lastSeenGr_static = -1
        fw = open("att/" + self.run_id + "." + name + ".dec.txt", "w")

        for batch_ind, batch_data in enumerate(batch_iterator):
            enc_output, static_grRep, _, state_dec, batch_att = self.sess.run(
                (self.placeholders['enc_output'],
                 self.placeholders['static_gr_rep'],
                 self.placeholders['dec_output'],
                 self.placeholders['state_Dec'], self.placeholders['att']),
                feed_dict=batch_data)
            grReps_enc, start_idx_enc, step_enc, lastSeenGr_enc = self.finalRep(
                enc_output, grReps_enc, start_idx_enc, step_enc,
                lastSeenGr_enc)
            grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec = self.finalRep(
                state_dec, grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec)
            grReps_static, start_idx_static, step_static, lastSeenGr_static = self.finalRep(
                static_grRep, grReps_static, start_idx_static, step_static,
                lastSeenGr_static)
            self.writeAttBatch(fw, batch_att, start_att, step_att)
            start_att = start_idx_dec + self.params["win"]
            step_att = step_dec + self.params["win"]

        fw.flush()
        fw.close()
        self.writeGrRepSVMformat(grReps_enc, data, name, "enc", training)
        self.writeGrRepSVMformat(grReps_dec, data, name, "dec", training)
        self.writeGrRepSVMformat(grReps_static, data, name, "static", training)

        print(str(len(data)))
    def run_epoch(self,
                  epoch_name: str,
                  data,
                  is_training: bool,
                  start_step: int = 0):
        chemical_accuracies = np.array([
            0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113,
            0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976,
            0.004527465, 0.012292586, 0.037467458
        ])

        loss = 0
        accuracies = []
        accuracy_ops = [
            self.ops['accuracy_task%i' % task_id]
            for task_id in self.params['task_ids']
        ]
        start_time = time.time()
        processed_data = 0
        steps = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, is_training),
                                          max_queue_size=5)

        with tf.Session() as session:

            for my_step, my_batch_data in enumerate(batch_iterator):
                precessed_data += self.params['batch_size']
                #other information should be in fetch_list like loss
                fetch_list = [self.ops['loss'], logits]
                result = session.run(fetch_list, feed_dict=my_batch_data)
                #result is the output layer
                (batch_loss, batch_logits) = (result[0], result[1])
                loss += batch_loss * self.params['batch_size']
                print(
                    "Running %s, batch %i (has %i graphs). Loss so far: %.4f" %
                    (epoch_name, step, self.params['batch_size'],
                     loss / processed_data),
                    end='\r')
                steps += 1
            loss = loss / processed_data
            instance_per_sec = processed_data / (time.time() - start_time)
        return loss, instance_per_sec, steps
示例#11
0
    def findRep(self, data, name, training):
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, False),
                                          max_queue_size=5)
        grReps_enc = []
        grReps_dec = []
        grReps_static = []
        step_enc = 0
        step_dec = 0
        step_static = 0

        start_idx_enc = step_enc
        start_idx_dec = step_dec
        start_idx_static = step_static

        lastSeenGr_enc = -1
        lastSeenGr_dec = -1
        lastSeenGr_static = -1

        for batch_ind, batch_data in enumerate(batch_iterator):
            enc_output, static_grRep, _, state_dec = self.sess.run(
                (self.placeholders['enc_output'],
                 self.placeholders['static_gr_rep'],
                 self.placeholders['dec_output'],
                 self.placeholders['state_Dec']),
                feed_dict=batch_data)
            grReps_enc, start_idx_enc, step_enc, lastSeenGr_enc = self.finalRep(
                enc_output, grReps_enc, start_idx_enc, step_enc,
                lastSeenGr_enc)
            grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec = self.finalRep(
                state_dec, grReps_dec, start_idx_dec, step_dec, lastSeenGr_dec)
            grReps_static, start_idx_static, step_static, lastSeenGr_static = self.finalRep(
                static_grRep, grReps_static, start_idx_static, step_static,
                lastSeenGr_static)

        self.writeGrRepSVMformat(grReps_enc, data, name, "enc", training)
        self.writeGrRepSVMformat(grReps_dec, data, name, "dec", training)
        self.writeGrRepSVMformat(grReps_static, data, name, "static", training)

        print(str(len(data)))
示例#12
0
    def run_epoch(self, epoch_name: str, data, is_training: bool):
        loss = 0
        accuracies = []
        precision = []
        recall = []
        f1=[]
        accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']]
        precision_ops = [self.ops['precision_task%i' % task_id] for task_id in self.params['task_ids']]
        recall_ops = [self.ops['recall_task%i' % task_id] for task_id in self.params['task_ids']]
        f1_ops = [self.ops['f1_task%i' % task_id] for task_id in self.params['task_ids']]
        
        start_time = time.time()
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(data, is_training), max_queue_size=5)
        
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            if is_training:
                batch_data[self.placeholders['out_layer_dropout_keep_prob']] = self.params['out_layer_dropout_keep_prob']
                fetch_list = [self.ops['loss'], accuracy_ops, accuracy_ops, precision_ops, recall_ops, f1_ops, self.ops['train_step']]
            else:
                batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                fetch_list = [self.ops['loss'], accuracy_ops, precision_ops, recall_ops, f1_ops]
            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_loss, batch_accuracies, batch_precision, batch_recall, batch_f1) = (result[0], result[1], result[2], result[3], result[4])
            loss += batch_loss * num_graphs
            accuracies.append(np.array(batch_accuracies) * num_graphs)
            precision.append(np.array(batch_precision) * num_graphs)
            recall.append(np.array(batch_recall) * num_graphs)
            f1.append(np.array(batch_f1) * num_graphs)

        accuracies = np.sum(accuracies, axis=0) / processed_graphs
        precision = np.sum(precision, axis=0) / processed_graphs
        recall = np.sum(recall, axis=0) / processed_graphs
        f1 = np.sum(f1, axis=0) / processed_graphs
        loss = loss / processed_graphs
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return loss, accuracies, precision, recall, f1, instance_per_sec
示例#13
0
    def test(self, testfile):
        if os.path.exists(self.best_model_file):
            self.restore_model(self.best_model_file)
        testdata, skipped_graphs = self.load_data(testfile,is_training_data=False)
        processed_graphs = 0
        accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']]
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(testdata, False), max_queue_size=5)
        preds = []
        accs = []
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs

            batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0
            fetch_list = [self.ops['predicted_values'], accuracy_ops]

            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_pred, batch_accuracies) = (result[0], result[1])
            batch_pred = np.array(batch_pred)
            if len(batch_pred.shape) == 1:
                batch_pred = np.expand_dims(batch_pred, 1)
            preds.append(batch_pred.T)
            accs.append(np.array(batch_accuracies)*num_graphs)
        return np.concatenate(preds,0), np.sum(accs, axis=0)/float(processed_graphs)
示例#14
0
    def run_epoch(self, epoch_name: str, data, is_training: bool):
        chemical_accuracies = np.array([
            0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113,
            0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976,
            0.004527465, 0.012292586, 0.037467458
        ])

        loss = 0
        average_precision = 0
        accuracies = []
        accuracy_ops = [
            self.ops['accuracy_task%i' % task_id]
            for task_id in self.params['task_ids']
        ]
        start_time = time.time()
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, is_training),
                                          max_queue_size=5)
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            if is_training:
                batch_data[self.placeholders[
                    'out_layer_dropout_keep_prob']] = self.params[
                        'out_layer_dropout_keep_prob']
                #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['target_values'], self.ops['pred_prob'], self.ops['optimizer']]
                #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['optimizer']]
                fetch_list = [
                    self.ops['loss'], accuracy_ops, self.ops['train_step']
                ]
                #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['optimizer'], self.ops['computed_values'], self.ops['target_values'], self.ops['prediction']]
                #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['train_step'], self.ops['computed_values'], self.ops['target_values'], self.ops['prediction']]
            else:
                batch_data[
                    self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                #fetch_list = [self.ops['loss'], accuracy_ops, self.ops['target_values'], self.ops['pred_prob']]
                fetch_list = [self.ops['loss'], accuracy_ops]
            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_loss, batch_accuracies) = (result[0], result[1])
            #(batch_loss, batch_accuracies, batch_target_values, batch_pred_prob) = (result[0], result[1], result[2], result[3])
            loss += batch_loss * num_graphs
            #ap = average_precision_score(batch_target_values, batch_pred_prob)
            #average_precision += ap * num_graphs
            accuracies.append(np.array(batch_accuracies) * num_graphs)

            print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" %
                  (epoch_name, step, num_graphs, loss / processed_graphs),
                  end='\r')

            #x = self.sess.run(self.output)
            #prediction = result[-1]
            #target_values = result[-2]
            #computed_values = result[-3]
            #import pdb
            #pdb.set_trace()

        print("Num graphs proessed:", processed_graphs)
        #average_precision = average_precision / processed_graphs
        accuracies = np.sum(accuracies, axis=0) / processed_graphs
        loss = loss / processed_graphs
        error_ratios = accuracies / chemical_accuracies[
            self.params["task_ids"]]
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return loss, average_precision, accuracies, error_ratios, instance_per_sec
示例#15
0
    def train(self, is_test):
        log_to_save = []
        total_time_start = time.time()
        summ_line = '%d\t%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f'
        line = 'loss:%.2f\tacc:%.2f\tprecision:%.2f\trecall:%.2f\tf1:%.2f\tspeed:%.2f'

        bak_train_data = []
        bak_valid_data = []
        with self.graph.as_default():
            if self.args.get('--restore') is not None:
                #valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed = self.run_epoch("Test (validation)", self.valid_data, False)
                #with open('./outputs/test.log', 'a') as f:
                #print(line%(valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed), file = f)
                _, valid_accs, _, _, _, _ = self.run_epoch(
                    "Resumed (validation)", self.valid_data, False, False)
                best_val_acc = np.sum(valid_accs)
                best_val_acc_epoch = 0
                print(
                    "\r\x1b[KResumed operation, initial cum. val. acc: %.5f" %
                    best_val_acc)
            else:
                (best_val_acc, best_val_acc_epoch) = (0., 0.)
            if is_test == False:
                for epoch in range(1, self.params['num_epochs'] + 1):
                    train_loss, train_accs, train_precision, train_recall, train_f1, train_speed = self.run_epoch(
                        "epoch %i (training)" % epoch, self.train_data, True,
                        False)
                    valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed = self.run_epoch(
                        "epoch %i (validation)" % epoch, self.valid_data,
                        False, False)
                    epoch_time = time.time() - total_time_start
                    print(summ_line %
                          (epoch, self.params['train_file'], train_loss,
                           train_accs, train_precision, train_recall, train_f1,
                           train_speed, epoch_time))
                    print(summ_line %
                          (epoch, self.params['valid_file'], valid_loss,
                           valid_accs, valid_precision, valid_recall, valid_f1,
                           valid_speed, epoch_time))
                    #    with open('./outputs/train.log', 'a') as f:
                    #        print(line%(train_loss, train_accs, train_precision, train_recall, train_f1, train_speed), file = f)
                    #with open('./outputs/test.log', 'a') as f:
                    #    print(line%(valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed), file = f)

                    bak_train_data.append([
                        epoch, self.params['train_file'], train_loss,
                        np.sum(train_accs),
                        np.sum(train_precision),
                        np.sum(train_recall),
                        np.sum(train_f1), train_speed
                    ])
                    bak_valid_data.append([
                        epoch, self.params['valid_file'], valid_loss,
                        np.sum(valid_accs),
                        np.sum(valid_precision),
                        np.sum(valid_recall),
                        np.sum(valid_f1), valid_speed
                    ])

                    if is_test == False:
                        val_acc = np.sum(valid_accs)  # type: float
                        if val_acc > best_val_acc:
                            self.save_model(self.best_model_file)
                            print(
                                "(Best epoch so far, cum. val. acc increased to %.5f from %.5f. Saving to '%s')"
                                %
                                (val_acc, best_val_acc, self.best_model_file))
                            best_val_acc = val_acc
                            best_val_acc_epoch = epoch
                    # elif epoch - best_val_acc_epoch >= self.params['patience']:
                    #     print("Stopping training after %i epochs without improvement on validation accuracy." % self.params['patience'])
                    #     break

                    if self.params['timeout'] < epoch_time:
                        print("Stopping training after %i epochs timeout." %
                              epoch)
                        break

        header = "epoch\tfile\tloss\taccs\tprecision\trecall\tf1\tspeed\n"
        if is_test == True:
            batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
                self.valid_data, False),
                                              max_queue_size=5)
            for batch_data in batch_iterator:
                batch_data[
                    self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                valid_loss, valid_accs, valid_precision, valid_recall, valid_f1, valid_speed = self.run_epoch(
                    "Test: ", batch_data, False, True)
                print("Test: %s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" %
                      (self.params['valid_file'], valid_loss, valid_accs,
                       valid_precision, valid_recall, valid_f1, valid_speed))
                #with open(self.online_data_backup_file + "_train_final.txt", "w") as f:
                #    f.write(header)
                #    for line in bak_train_data:
                #        f.write("\t".join([str(item) for item in line]) + "\n")
                with open(self.online_data_backup_file + "_test.txt",
                          "w") as f:
                    f.write("file\tloss\taccs\tprecision\trecall\tf1\tspeed\n")
                    f.write("\t".join([
                        self.params['valid_file'], valid_loss, valid_accs,
                        valid_precision, valid_recall, valid_f1, valid_speed
                    ]) + "\n")
        else:
            with open(self.online_data_backup_file + "_train.txt", "w") as f:
                f.write(header)
                for line in bak_train_data:
                    f.write("\t".join([str(item) for item in line]) + "\n")
            with open(self.online_data_backup_file + "_valid.txt", "w") as f:
                f.write(header)
                for line in bak_valid_data:
                    f.write("\t".join([str(item) for item in line]) + "\n")
示例#16
0
    def run_epoch(self, epoch_name: str, data, is_training: bool, is_test: bool):
        init_begin_time = time.time()
        loss = 0
        TP_all = []
        TN_all = []
        FP_all = []
        FN_all = []
        #accuracies = []
        #precision = []
        #recall = []
        #f1=[]
        #accuracy_ops = [self.ops['accuracy_task%i' % task_id] for task_id in self.params['task_ids']]
        #precision_ops = [self.ops['precision_task%i' % task_id] for task_id in self.params['task_ids']]
        #recall_ops = [self.ops['recall_task%i' % task_id] for task_id in self.params['task_ids']]
        #f1_ops = [self.ops['f1_task%i' % task_id] for task_id in self.params['task_ids']]
        TP_ops = [self.ops['TP%i' % task_id] for task_id in self.params['task_ids']]
        TN_ops = [self.ops['TN%i' % task_id] for task_id in self.params['task_ids']]
        FP_ops = [self.ops['FP%i' % task_id] for task_id in self.params['task_ids']]
        FN_ops = [self.ops['FN%i' % task_id] for task_id in self.params['task_ids']]

        
        start_time = time.time()
        processed_graphs = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(data, is_training), max_queue_size=5)
        init_end_time = time.time()
        step_num = 0
        
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            run_begin_time = time.time()
            if is_training:
                batch_data[self.placeholders['out_layer_dropout_keep_prob']] = self.params['out_layer_dropout_keep_prob']
                #fetch_list = [self.ops['loss'], accuracy_ops, accuracy_ops, precision_ops, recall_ops, f1_ops, self.ops['train_step']]
                fetch_list = [TP_ops, TN_ops, FP_ops, FN_ops, self.ops['loss']]
            else:
                batch_data[self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                #fetch_list = [self.ops['loss'], accuracy_ops, precision_ops, recall_ops, f1_ops]
                fetch_list = [TP_ops, TN_ops, FP_ops, FN_ops, self.ops['loss']]
            result = self.sess.run(fetch_list, feed_dict=batch_data)
            run_end_time = time.time()
            #if is_training:
            #    train_vector = []
            #    target_vector = []
            #    res = self.sess.run([self.computed_values, self.prediction,self.tv,self.labels],feed_dict=batch_data)

                #res = self.sess.run([self.computed_values, self.tv],feed_dict=batch_data)
                #train_vector = res[0]
                #target_vector = res[1]

            #    computed_v = res[0]
            #    pred = res[1]
            #    target_v = res[2]
            #    labels = res[3]
            #    for i, array in enumerate(computed_v):
            #        train_vector.append(array[pred[i]])
            #    for i, array in enumerate(target_v):
            #        target_vector.append(array[labels[i]])
            #    with open('./outputs/train_vector.txt','a') as f:
            #        f.write(str(train_vector)+'\n')
            #    with open('./outputs/target_vector.txt','a') as f:
            #        f.write(str(target_vector)+'\n')
            #else:
                #vector = self.sess.run(self.computed_values,feed_dict=batch_data)
            #    vector = []
            #    for i, array in enumerate(self.sess.run(self.computed_values,feed_dict=batch_data)):
            #        indices = self.sess.run(self.prediction,feed_dict=batch_data)
            #        vector.append(array[indices[i]])
            #    with open('./outputs/vector.txt','a') as f:
            #        f.write(str(vector)+'\n')
            # gated_output = self.sess.run(self.gated_outputs,feed_dict=batch_data)
            step_num = step
            if not is_training and is_test:
                final_node = self.sess.run(self.compute_final_node_representations(),feed_dict=batch_data)
                final_node = np.mean(final_node, 0)
                np.save('./outputs/ggnn_vector/{}_test_final_node.npy'.format(self.valid_file), final_node)
            # np.save('./outputs/gated_output.npy', gated_output)

            #with open('./outputs/gated_outputs.txt', 'a') as f:
            #    f.write(str(gated_output)+'\n')

            (TP_batch, TN_batch, FP_batch, FN_batch, batch_loss) = (result[0], result[1], result[2], result[3], result[4]) 
            TP_all.append(TP_batch)
            TN_all.append(TN_batch)
            FP_all.append(FP_batch)
            FN_all.append(FN_batch)
            #(batch_loss, batch_accuracies, batch_precision, batch_recall, batch_f1) = (result[0], result[1], result[2], result[3], result[4])
            loss += batch_loss * num_graphs
            #accuracies.append(np.array(batch_accuracies) * num_graphs)
            #precision.append(np.array(batch_precision) * num_graphs)
            #recall.append(np.array(batch_recall) * num_graphs)
            #f1.append(np.array(batch_f1) * num_graphs)
        result_begin_time = time.time()
        TP_all = tf.reduce_sum(TP_all)
        TN_all = tf.reduce_sum(TN_all)
        FP_all = tf.reduce_sum(FP_all)
        FN_all = tf.reduce_sum(FN_all)
        print('TP: ', self.sess.run(TP_all), '\tTN: ', self.sess.run(TN_all), '\tFP: ', self.sess.run(FP_all), '\tFN: ', self.sess.run(FN_all))
        #accuracies = np.sum(accuracies, axis=0) / processed_graphs
        #precision = np.sum(precision, axis=0) / processed_graphs
        #recall = np.sum(recall, axis=0) / processed_graphs
        #f1 = np.sum(f1, axis=0) / processed_graphs
        TP = self.sess.run(TP_all)
        TN = self.sess.run(TN_all)
        FP = self.sess.run(FP_all)
        FN = self.sess.run(FN_all)
        
        loss = loss / processed_graphs
        instance_per_sec = processed_graphs / (time.time() - start_time)
        accuracies = (TP_all + TN_all)/(TP_all + TN_all + FP_all + FN_all)
        precision = (TP_all)/(TP_all + FP_all)
        recall = (TP_all)/(TP_all + FN_all)
        f1 = 2 * precision * recall / (precision + recall)
        print(epoch_name)
        #with tf.Session() as sess:
        #    accuracies = accuracies.eval()
        #    precision = precision.eval()
        #    recall = recall.eval()
        #    f1 = f1.eval()

        accuracies = float(self.sess.run(accuracies))
        precision = float(self.sess.run(precision))
        recall = float(self.sess.run(recall))
        f1 = float(self.sess.run(f1))
        result_end_time = time.time()

        with open('./outputs/time.log', 'a') as f:
            out_str = 'init_time: {}\trun_once_time:{}\tloop_num:{}\tcompute_result_time: {}'.format(init_end_time-init_begin_time, run_end_time-run_begin_time, step_num, result_end_time-result_begin_time)
            f. write('---------- run epoch: '+str(int(run_begin_time))+'----------\n')
            f.write(out_str+'\n')
            print(out_str)
        #accuracies = list(accuracies.numpy())
        #precision = list(precision.numpy())
        #recall = list(recall.numpy())
        #f1 = list(f1.numpy())

        return loss, accuracies, precision, recall, f1, instance_per_sec, TP, TN, FP, FN
示例#17
0
    def run_epoch(self,
                  epoch_name: str,
                  data,
                  is_training: bool,
                  start_step: int = 0):
        chemical_accuracies = np.array([
            0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113,
            0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976,
            0.004527465, 0.012292586, 0.037467458
        ])

        loss = 0
        accuracies = []
        accuracy_ops = [
            self.ops['accuracy_task%i' % task_id]
            for task_id in self.params['task_ids']
        ]
        start_time = time.time()
        processed_graphs = 0
        steps = 0
        acc_las, acc_uas = 0, 0
        acc_uas_e = 0
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, is_training),
                                          max_queue_size=5)
        all_labels, all_labels_e, all_computed_values, all_computed_values_e, \
        all_num_vertices, all_masks, all_masks_e, all_ids, all_adj_m = \
            [], [], [], [], [], [], [], [], []

        if self.params.get('is_test'):
            csv_file = open(self.test_results_file, 'w', newline='')
            writer = csv.writer(csv_file)
            row_headers = [
                'loc', 'token', 'LAS', 'UAS', 'label_acc', 'POS', 'dep',
                'dep_l', 'head', 'head_token', 'head_POS', 'head_dep',
                'head_dep_l', 'target_head', 'target_pos', 'target_dep',
                'target_dep_l', 'result_head', 'result_dep', 'result_dep',
                'active_nodes'
            ]
            writer.writerow(row_headers)
        else:
            csv_file = None
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            fetch_list_names = [
                'loss', 'accuracy_ops', 'summary', 'loss_edges', 'labels',
                'computed_values', 'final_node_representations', 'node_mask',
                'losses', 'edge_weights', 'edge_biases', 'num_vertices',
                'adjacency_matrix', 'sentences_id', 'word_inputs',
                'target_pos', 'computed_values_edges', 'labels_edges',
                'node_mask_edges', 'word_embeddings', 'emb_dropout_keep_prob'
            ]

            fetch_list = [
                self.ops['loss'], accuracy_ops, self.ops['summary'],
                self.ops['loss_edges'], self.ops['labels'],
                self.ops['computed_values'],
                self.ops['final_node_representations'], self.ops['node_mask'],
                self.ops['losses'], self.weights['edge_weights'],
                self.weights['edge_biases'], self.placeholders['num_vertices'],
                self.placeholders['adjacency_matrix'],
                self.placeholders['sentences_id'], self.ops['word_inputs'],
                self.placeholders['target_pos'],
                self.ops['computed_values_edges'],
                self.placeholders['target_values_edges'],
                self.placeholders['node_mask_edges'],
                self.weights['word_embeddings'],
                self.placeholders['emb_dropout_keep_prob']
            ]

            index_d = {
                fetch_list_names[i]: i
                for i in range(len(fetch_list_names))
            }
            if is_training:
                batch_data[self.placeholders[
                    'out_layer_dropout_keep_prob']] = self.params[
                        'out_layer_dropout_keep_prob']
                fetch_list.append(self.ops['train_step'])

            else:
                # it is not trainining because we are not requesting the self.ops['train_step'] parametr
                batch_data[
                    self.placeholders['out_layer_dropout_keep_prob']] = 1.0

            result = self.sess.run(fetch_list, feed_dict=batch_data)
            #TODO: delete

            loss_edges = result[index_d['loss_edges']]
            labels = result[index_d['labels']]
            computed_values = result[index_d['computed_values']]
            final_node_representations = result[
                index_d['final_node_representations']]
            node_mask = result[index_d['node_mask']]
            edge_weights = result[index_d['edge_weights']]
            edge_biases = result[index_d['edge_biases']]
            num_vertices = result[index_d['num_vertices']]
            adjacency_matrix = result[index_d['adjacency_matrix']]
            sentences_id = result[index_d['sentences_id']]
            word_inputs = result[index_d['word_inputs']]
            target_pos = result[index_d['target_pos']]
            computed_values_edges = result[index_d['computed_values_edges']]
            labels_edges = result[index_d['labels_edges']]
            node_mask_edges = result[index_d['node_mask_edges']]
            word_embeddings = result[index_d['word_embeddings']]
            emb_dropout_keep_prob = result[index_d['emb_dropout_keep_prob']]

            (batch_loss, batch_accuracies,
             batch_summary) = (result[0], result[1], result[2])
            if not self.params.get('is_test'):
                writer = self.train_writer if is_training else self.valid_writer
                writer.add_summary(batch_summary, start_step + step)
            loss += batch_loss * num_graphs
            accuracies.append(np.array(batch_accuracies) * num_graphs)

            try:
                word_inputs = batch_data[self.placeholders['word_inputs']]
                las, uas, uas_e = self.humanize_batch_results(
                    labels=labels,
                    computed_values=computed_values,
                    num_vertices=num_vertices,
                    mask=node_mask,
                    ids=sentences_id,
                    adms=adjacency_matrix,
                    labels_e=labels_edges,
                    computed_values_e=computed_values_edges,
                    mask_edges=node_mask_edges,
                    word_inputs=word_inputs,
                    target_pos=target_pos,
                    out_file=csv_file)

                acc_las += las * num_graphs
                acc_uas += uas * num_graphs
                acc_uas_e += uas_e * num_graphs

            except:
                print('edge weights: %s' % edge_weights)
                print('edge bias: %s' % edge_biases)
                raise Exception('Apparent division by zero, comp_values: %s' %
                                computed_values[0])

            print("Running %s, batch %i (has %i graphs). Loss so far: %.4f" %
                  (epoch_name, step, num_graphs, loss / processed_graphs),
                  end='\r')
            steps += 1

            all_labels.append(labels)
            all_labels_e.append(labels_edges)
            all_computed_values.append(computed_values)
            all_computed_values_e.append(computed_values_edges)
            all_num_vertices.append(num_vertices)
            all_masks.append(node_mask)
            all_masks_e.append(node_mask_edges)
            all_ids.append(sentences_id)
            all_adj_m.append(adjacency_matrix)

        accuracies = np.sum(accuracies, axis=0) / processed_graphs
        loss = loss / processed_graphs
        error_ratios = accuracies / chemical_accuracies[
            self.params["task_ids"]]
        instance_per_sec = processed_graphs / (time.time() - start_time)
        acc_las = acc_las / processed_graphs
        acc_uas = acc_uas / processed_graphs
        acc_uas_e = acc_uas_e / processed_graphs

        return loss, accuracies, error_ratios, instance_per_sec, steps, acc_las, acc_uas, \
               all_labels, all_computed_values, all_num_vertices, all_masks, \
               all_ids, all_adj_m, all_labels_e, all_computed_values_e, all_masks_e, acc_uas_e
    def run_epoch(self, epoch_name: str, data, is_training: bool):
        chemical_accuracies = np.array([
            0.066513725, 0.012235489, 0.071939046, 0.033730778, 0.033486113,
            0.004278493, 0.001330901, 0.004165489, 0.004128926, 0.00409976,
            0.004527465, 0.012292586, 0.037467458
        ])

        loss = 0
        accuracies = []
        start_time = time.time()
        processed_graphs = 0
        accuracy_ops = [
            self.ops['accuracy_task%i' % task_id]
            for task_id in self.params['task_ids']
        ]
        batch_iterator = ThreadedIterator(self.make_minibatch_iterator(
            data, is_training),
                                          max_queue_size=5)
        for step, batch_data in enumerate(batch_iterator):
            num_graphs = batch_data[self.placeholders['num_graphs']]
            processed_graphs += num_graphs
            if is_training:
                batch_data[self.placeholders[
                    'out_layer_dropout_keep_prob']] = self.params[
                        'out_layer_dropout_keep_prob']
                fetch_list = [
                    self.ops['loss'], accuracy_ops, self.ops['train_step']
                ]
            else:
                batch_data[
                    self.placeholders['out_layer_dropout_keep_prob']] = 1.0
                fetch_list = [self.ops['loss'], accuracy_ops]
            val_1, val_2, val_3, val_4, val_5, val_6 = self.sess.run(
                [
                    self.ops['sigm_c'], self.ops['sigm_TP'],
                    self.ops['sigm_FN'], self.ops['sigm_FP'],
                    self.ops['sigm_TN'], self.ops['sigm_sum']
                ],
                feed_dict=batch_data)
            val_R, val_P, val_F1, val_FPR = self.sess.run([
                self.ops['sigm_Recall'], self.ops['sigm_Precision'],
                self.ops['sigm_F1'], self.ops['sigm_FPR']
            ],
                                                          feed_dict=batch_data)

            result = self.sess.run(fetch_list, feed_dict=batch_data)
            (batch_loss, batch_accuracies) = (result[0], result[1])
            loss += batch_loss * num_graphs
            accuracies.append(np.array(batch_accuracies) * num_graphs)

            print("random seed: {}".format(self.random_seed))
            print("sum: {}".format(val_6))
            print("TP: {}".format(val_2))
            print("FN: {}".format(val_3))
            print("FP: {}".format(val_4))
            print("TN: {}".format(val_5))
            print("Recall: {}".format(val_R))
            print("Precision: {}".format(val_P))
            print("F1: {}".format(val_F1))
            print("FPR: {}".format(val_FPR))
            print("Running %s, batch %i (has %i graphs). "
                  "Loss so far: %.4f" %
                  (epoch_name, step, num_graphs, loss / processed_graphs),
                  end='\r')

        accuracies = np.sum(accuracies, axis=0) / processed_graphs
        loss = loss / processed_graphs
        error_ratios = accuracies / chemical_accuracies[
            self.params["task_ids"]]
        instance_per_sec = processed_graphs / (time.time() - start_time)
        return loss, accuracies, error_ratios, instance_per_sec