示例#1
0
    def tune_model(self):
        """Function to tune the model."""
        acc = 0

        generator_config = GeneratorConfig(
            data=self.trainon,
            algo=self.model.model_name,
            batch_size=self.model.config.batch_size)
        self.gen_train = Generator(config=generator_config,
                                   model_config=self.model.config)

        self.evaluator = Evaluation(model=self.model,
                                    data_type=self.teston,
                                    debug=self.debug,
                                    tuning=True,
                                    session=self.sess)

        for n_iter in range(self.config.epochs):
            self.train_model_epoch(n_iter)

        self.gen_train.stop()
        self.evaluator.test_batch(n_iter)
        acc = self.evaluator.output_queue.get()
        self.evaluator.stop()
        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return acc
示例#2
0
    def train_model(self):
        """Function to train the model."""
        loss = 0

        if self.config.loadFromData:
            self.load_model()
        else:
            generator_config = GeneratorConfig(
                data=self.trainon,
                algo=self.model.model_name,
                batch_size=self.model.config.batch_size)
            self.gen_train = Generator(config=generator_config,
                                       model_config=self.model.config)

            if not self.tuning:
                self.evaluator = Evaluation(model=self.model,
                                            data_type=self.teston,
                                            debug=self.debug,
                                            session=self.sess)

            for n_iter in range(self.config.epochs):
                loss = self.train_model_epoch(n_iter)
                if not self.tuning:
                    self.test(n_iter)

            self.gen_train.stop()

            if not self.tuning:
                self.evaluator.save_training_result(self.training_results)
                self.evaluator.stop()

            if self.config.save_model:
                self.save_model()

        if self.config.disp_result:
            self.display()

        if self.config.disp_summary:
            self.summary()
            self.summary_hyperparameter()

        if not os.path.exists("./tmp"):
            os.mkdir("./tmp")

        save_path = self.saver.save(self.sess, "./tmp/model.ckpt")
        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return loss
示例#3
0
    def train_model(self):
        """Function to train the model."""
        loss = 0

        if self.config.loadFromData:
            self.load_model()
        else:
            self.gen_train = Generator(config=self.generator_config,
                                       model_config=self.model.config)

            if not self.tuning:
                self.evaluator = Evaluation(model=self.model,
                                            data_type=self.teston,
                                            debug=self.debug,
                                            session=self.sess)

            for n_iter in range(self.config.epochs):
                loss = self.train_model_epoch(n_iter)
                if not self.tuning:
                    self.test(n_iter)

            self.gen_train.stop()

            if not self.tuning:
                self.evaluator.save_training_result(self.training_results)
                self.evaluator.stop()

            if self.config.save_model:
                self.save_model()

        if self.config.disp_result:
            self.display()

        if self.config.disp_summary:
            self.summary()
            self.summary_hyperparameter()

        self.export_embeddings()

        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return loss
示例#4
0
    def test(self, curr_epoch):
        """function to test the model.
           
           Args:
                curr_epoch (int): The current epoch number.
        """
        if not self.evaluator:
            self.evaluator = Evaluation(model=self.model,
                                        data_type=self.teston,
                                        debug=self.debug,
                                        session=self.sess)

        if not self.config.full_test_flag and (
                curr_epoch % self.config.test_step == 0 or curr_epoch == 0
                or curr_epoch == self.config.epochs - 1):
            self.evaluator.test_batch(curr_epoch)
        else:
            if curr_epoch == self.config.epochs - 1:
                self.evaluator.test_batch(curr_epoch)
示例#5
0
class Trainer(TrainerMeta):
    """Class for handling the training of the algorithms.

        Args:
            model (object): Model object
            debug (bool): Flag to check if its debugging
            tuning (bool): Flag to denoting tuning if True

        Examples:
            >>> from pykg2vec.utils.trainer import Trainer
            >>> from pykg2vec.core.TransE import TransE
            >>> trainer = Trainer(model=TransE(), debug=False)
            >>> trainer.build_model()
            >>> trainer.train_model()
    """
    def __init__(self,
                 model,
                 trainon='train',
                 teston='valid',
                 debug=False,
                 tuning=False):
        self.debug = debug
        self.model = model
        self.config = self.model.config
        self.training_results = []
        self.gen_train = None
        self.tuning = tuning
        self.trainon = trainon
        self.teston = teston

    def infer_tails(self, h, r, sess, topk=5):
        tails = self.model.infer_tails(h, r, topk)
        self.saver.restore(sess, "./tmp/model.ckpt")
        tails = tails.eval()
        print("\n(head, relation)->({},{}) :: Inferred tails->({})\n".format(
            h, r, ",".join([str(i) for i in tails])))

    def infer_heads(self, r, t, sess, topk=5):
        heads = self.model.infer_heads(r, t, topk)
        self.saver.restore(sess, "./tmp/model.ckpt")
        heads = heads.eval()
        print("\n(relation,tail)->({},{}) :: Inferred heads->({})\n".format(
            t, r, ",".join([str(i) for i in heads])))

    def build_model(self):
        """function to build the model"""
        self.model.def_inputs()
        self.model.def_parameters()
        if getattr(self.model, "def_layer", None):
            self.model.def_layer()
        self.model.def_loss()

        if not self.debug:
            self.sess = tf.Session(config=self.config.gpu_config)
        else:
            self.sess = tf.InteractiveSession()
        self.global_step = tf.Variable(0, name="global_step", trainable=False)

        if self.config.optimizer == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'adagrad':
            optimizer = tf.train.AdagradOptimizer(
                learning_rate=self.config.learning_rate)
        else:
            raise NotImplementedError("No support for %s optimizer" %
                                      self.config.optimizer)

        grads = optimizer.compute_gradients(self.model.loss)
        self.op_train = optimizer.apply_gradients(grads,
                                                  global_step=self.global_step)
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()
        if not self.tuning:
            self.summary()
            self.summary_hyperparameter()

    ''' Training related functions:'''

    def train_model(self):
        """Function to train the model."""
        loss = 0

        if self.config.loadFromData:
            self.load_model()
        else:
            generator_config = GeneratorConfig(
                data=self.trainon,
                algo=self.model.model_name,
                batch_size=self.model.config.batch_size)
            self.gen_train = Generator(config=generator_config,
                                       model_config=self.model.config)

            if not self.tuning:
                self.evaluator = Evaluation(model=self.model,
                                            data_type=self.teston,
                                            debug=self.debug,
                                            session=self.sess)

            for n_iter in range(self.config.epochs):
                loss = self.train_model_epoch(n_iter)
                if not self.tuning:
                    self.test(n_iter)

            self.gen_train.stop()

            if not self.tuning:
                self.evaluator.save_training_result(self.training_results)
                self.evaluator.stop()

            if self.config.save_model:
                self.save_model()

        if self.config.disp_result:
            self.display()

        if self.config.disp_summary:
            self.summary()
            self.summary_hyperparameter()

        if not os.path.exists("./tmp"):
            os.mkdir("./tmp")

        save_path = self.saver.save(self.sess, "./tmp/model.ckpt")
        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return loss

    def tune_model(self):
        """Function to tune the model."""
        acc = 0

        generator_config = GeneratorConfig(
            data=self.trainon,
            algo=self.model.model_name,
            batch_size=self.model.config.batch_size)
        self.gen_train = Generator(config=generator_config,
                                   model_config=self.model.config)

        self.evaluator = Evaluation(model=self.model,
                                    data_type=self.teston,
                                    debug=self.debug,
                                    tuning=True,
                                    session=self.sess)

        for n_iter in range(self.config.epochs):
            self.train_model_epoch(n_iter)

        self.gen_train.stop()
        self.evaluator.test_batch(n_iter)
        acc = self.evaluator.output_queue.get()
        self.evaluator.stop()
        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return acc

    def train_model_epoch(self, epoch_idx):
        """Function to train the model for one epoch."""
        acc_loss = 0

        num_batch = self.model.config.kg_meta.tot_train_triples // self.config.batch_size if not self.debug else 10

        start_time = timeit.default_timer()

        for batch_idx in range(num_batch):
            data = list(next(self.gen_train))
            if self.model.model_name.lower() in [
                    "tucker", "tucker_v2", "conve", "convkb", "complex",
                    "distmult", "proje_pointwise"
            ]:
                h = data[0]
                r = data[1]
                t = data[2]
                hr_t = data[3]
                rt_h = data[4]

                feed_dict = {
                    self.model.h: h,
                    self.model.r: r,
                    self.model.t: t,
                    self.model.hr_t: hr_t,
                    self.model.rt_h: rt_h
                }
            else:
                ph = data[0]
                pr = data[1]
                pt = data[2]
                nh = data[3]
                nr = data[4]
                nt = data[5]

                feed_dict = {
                    self.model.pos_h: ph,
                    self.model.pos_t: pt,
                    self.model.pos_r: pr,
                    self.model.neg_h: nh,
                    self.model.neg_t: nt,
                    self.model.neg_r: nr
                }
            _, step, loss = self.sess.run(
                [self.op_train, self.global_step, self.model.loss], feed_dict)

            acc_loss += loss

            if not self.tuning:
                print('[%.2f sec](%d/%d): -- loss: %.5f' %
                      (timeit.default_timer() - start_time, batch_idx,
                       num_batch, loss),
                      end='\r')
        if not self.tuning:
            print('iter[%d] ---Train Loss: %.5f ---time: %.2f' %
                  (epoch_idx, acc_loss, timeit.default_timer() - start_time))

        self.training_results.append([epoch_idx, acc_loss])

        return acc_loss

    ''' Testing related functions:'''

    def test(self, curr_epoch):
        """function to test the model.
           
           Args:
                curr_epoch (int): The current epoch number.
        """

        if not self.config.full_test_flag and (
                curr_epoch % self.config.test_step == 0 or curr_epoch == 0
                or curr_epoch == self.config.epochs - 1):
            self.evaluator.test_batch(curr_epoch)
        else:
            if curr_epoch == self.config.epochs - 1:
                self.evaluator.test_batch(curr_epoch)

    ''' Procedural functions:'''

    def save_model(self):
        """Function to save the model."""
        saved_path = self.config.tmp / self.model.model_name
        saved_path.mkdir(parents=True, exist_ok=True)

        saver = tf.train.Saver(self.model.parameter_list)
        saver.save(self.sess, str(saved_path / 'model.vec'))

    def load_model(self):
        """Function to load the model."""
        saved_path = self.config.tmp / self.model.model_name
        if saved_path.exists():
            saver = tf.train.Saver(self.model.parameter_list)
            saver.restore(self.sess, str(saved_path / 'model.vec'))

    def display(self):
        """Function to display embedding."""
        options = {
            "ent_only_plot": True,
            "rel_only_plot": not self.config.plot_entity_only,
            "ent_and_rel_plot": not self.config.plot_entity_only
        }

        if self.config.plot_embedding:
            viz = Visualization(model=self.model,
                                vis_opts=options,
                                sess=self.sess)

            viz.plot_embedding(resultpath=self.config.figures,
                               algos=self.model.model_name,
                               show_label=False)

        if self.config.plot_training_result:
            viz = Visualization(model=self.model, sess=self.sess)
            viz.plot_train_result()

        if self.config.plot_testing_result:
            viz = Visualization(model=self.model, sess=self.sess)
            viz.plot_test_result()

    def export_embeddings(self):
        """Export embeddings in tsv format."""

        if not self.model:
            raise NotImplementedError('Please provide a model!')

        save_path = self.config.embeddings / self.model.model_name
        save_path.mkdir(parents=True, exist_ok=True)

        # export entity embeddings
        idx2entity = self.model.config.knowledge_graph.read_cache_data(
            'idx2entity')
        self.save_tsv_(save_path, "ent_vecs.tsv", "ent_meta.tsv",
                       self.model.ent_embeddings, idx2entity)

        # export relation embeddings
        idx2relation = self.model.config.knowledge_graph.read_cache_data(
            'idx2relation')
        self.save_tsv_(save_path, "rel_vecs.tsv", "rel_meta.tsv",
                       self.model.rel_embeddings, idx2relation)

    def save_tsv_(self, save_path, vec_fname, meta_fname, embeddings, names):
        emb_tensor = tf.nn.embedding_lookup(embeddings, list(names.keys()))
        emb_array = self.sess.run(emb_tensor)
        with open(str(save_path / vec_fname), 'w') as out_v, \
             open(str(save_path / meta_fname), 'w') as out_m:
            for idx in names:
                out_m.write(names[idx] + "\n")
                out_v.write("\t".join([str(x) for x in emb_array[idx]]) + "\n")

    def summary(self):
        """Function to print the summary."""
        print("\n------------------Global Setting--------------------")
        # Acquire the max length and add four more spaces
        maxspace = len(max([k for k in self.config.__dict__.keys()])) + 20
        for key, val in self.config.__dict__.items():
            if key in self.config.__dict__['hyperparameters']:
                continue

            if isinstance(val, (KGMetaData, KnowledgeGraph)) or key.startswith(
                    'gpu') or key.startswith('hyperparameters'):
                continue

            if len(key) < maxspace:
                for i in range(maxspace - len(key)):
                    key = ' ' + key
            print("%s : %s" % (key, val))
        print("---------------------------------------------------")

    def summary_hyperparameter(self):
        """Function to print the hyperparameter summary."""
        print("\n-----------%s Hyperparameter Setting-------------" %
              (self.model.model_name))
        maxspace = len(max([k
                            for k in self.config.hyperparameters.keys()])) + 15
        for key, val in self.config.hyperparameters.items():
            if len(key) < maxspace:
                for i in range(maxspace - len(key)):
                    key = ' ' + key
            print("%s : %s" % (key, val))
        print("---------------------------------------------------")
示例#6
0
class Trainer(TrainerMeta):
    """Class for handling the training of the algorithms.

        Args:
            model (object): Model object
            debug (bool): Flag to check if its debugging
            tuning (bool): Flag to denoting tuning if True

        Examples:
            >>> from pykg2vec.utils.trainer import Trainer
            >>> from pykg2vec.core.TransE import TransE
            >>> trainer = Trainer(model=TransE(), debug=False)
            >>> trainer.build_model()
            >>> trainer.train_model()
    """
    def __init__(self,
                 model,
                 trainon='train',
                 teston='test',
                 debug=False,
                 tuning=False):
        self.debug = debug
        self.model = model
        self.config = self.model.config
        self.training_results = []
        self.gen_train = None
        self.tuning = tuning
        self.trainon = trainon
        self.teston = teston

        self.evaluator = None
        self.gen_train = None

    def build_model(self):
        """function to build the model"""
        self.model.def_inputs()
        self.model.def_parameters()
        if getattr(self.model, "def_layer", None):
            self.model.def_layer()
        self.model.def_loss()

        if not self.debug:
            self.sess = tf.Session(config=self.config.gpu_config)
        else:
            self.sess = tf.InteractiveSession()
        self.global_step = tf.Variable(0, name="global_step", trainable=False)

        if self.config.optimizer == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'adagrad':
            optimizer = tf.train.AdagradOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'adadelta':
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=self.config.learning_rate)
        else:
            raise NotImplementedError("No support for %s optimizer" %
                                      self.config.optimizer)

        grads = optimizer.compute_gradients(self.model.loss)
        self.op_train = optimizer.apply_gradients(grads,
                                                  global_step=self.global_step)
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()
        if not self.tuning:
            self.summary()
            self.summary_hyperparameter()

        self.generator_config = GeneratorConfig(
            data=self.trainon,
            algo=self.model.model_name,
            batch_size=self.model.config.batch_size,
            process_num=self.model.config.num_process_gen,
            neg_rate=self.config.neg_rate)

    ''' Training related functions:'''

    def train_model(self):
        """Function to train the model."""
        loss = 0

        if self.config.loadFromData:
            self.load_model()
        else:
            self.gen_train = Generator(config=self.generator_config,
                                       model_config=self.model.config)

            if not self.tuning:
                self.evaluator = Evaluation(model=self.model,
                                            data_type=self.teston,
                                            debug=self.debug,
                                            session=self.sess)

            for n_iter in range(self.config.epochs):
                loss = self.train_model_epoch(n_iter)
                if not self.tuning:
                    self.test(n_iter)

            self.gen_train.stop()

            if not self.tuning:
                self.evaluator.save_training_result(self.training_results)
                self.evaluator.stop()

            if self.config.save_model:
                self.save_model()

        if self.config.disp_result:
            self.display()

        if self.config.disp_summary:
            self.summary()
            self.summary_hyperparameter()

        self.export_embeddings()

        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return loss

    def tune_model(self):
        """Function to tune the model."""
        acc = 0

        self.gen_train = Generator(config=self.generator_config,
                                   model_config=self.model.config)

        self.evaluator = Evaluation(model=self.model,
                                    data_type=self.teston,
                                    debug=self.debug,
                                    tuning=True,
                                    session=self.sess)

        for n_iter in range(self.config.epochs):
            self.train_model_epoch(n_iter)

        self.gen_train.stop()
        self.evaluator.test_batch(n_iter)
        acc = self.evaluator.output_queue.get()
        self.evaluator.stop()
        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return acc

    def train_model_epoch(self, epoch_idx):
        """Function to train the model for one epoch."""
        acc_loss = 0

        num_batch = self.model.config.kg_meta.tot_train_triples // self.config.batch_size if not self.debug else 10

        start_time = timeit.default_timer()

        for batch_idx in range(num_batch):
            data = list(next(self.gen_train))
            if self.model.model_name.lower() in [
                    "tucker", "tucker_v2", "conve", "convkb", "proje_pointwise"
            ]:
                h = data[0]
                r = data[1]
                t = data[2]
                hr_t = data[3]
                rt_h = data[4]

                feed_dict = {
                    self.model.h: h,
                    self.model.r: r,
                    self.model.t: t,
                    self.model.hr_t: hr_t,
                    self.model.rt_h: rt_h
                }
            else:
                ph = data[0]
                pr = data[1]
                pt = data[2]
                nh = data[3]
                nr = data[4]
                nt = data[5]

                feed_dict = {
                    self.model.pos_h: ph,
                    self.model.pos_t: pt,
                    self.model.pos_r: pr,
                    self.model.neg_h: nh,
                    self.model.neg_t: nt,
                    self.model.neg_r: nr
                }
            _, step, loss = self.sess.run(
                [self.op_train, self.global_step, self.model.loss], feed_dict)

            acc_loss += loss

            if not self.tuning:
                print('[%.2f sec](%d/%d): -- loss: %.5f' %
                      (timeit.default_timer() - start_time, batch_idx,
                       num_batch, loss),
                      end='\r')
        if not self.tuning:
            print('iter[%d] ---Train Loss: %.5f ---time: %.2f' %
                  (epoch_idx, acc_loss, timeit.default_timer() - start_time))

        self.training_results.append([epoch_idx, acc_loss])

        return acc_loss

    ''' Testing related functions:'''

    def test(self, curr_epoch):
        """function to test the model.
           
           Args:
                curr_epoch (int): The current epoch number.
        """
        if not self.evaluator:
            self.evaluator = Evaluation(model=self.model,
                                        data_type=self.teston,
                                        debug=self.debug,
                                        session=self.sess)

        if not self.config.full_test_flag and (
                curr_epoch % self.config.test_step == 0 or curr_epoch == 0
                or curr_epoch == self.config.epochs - 1):
            self.evaluator.test_batch(curr_epoch)
        else:
            if curr_epoch == self.config.epochs - 1:
                self.evaluator.test_batch(curr_epoch)

    ''' Interactive Inference related '''

    def enter_interactive_mode(self):
        self.build_model()
        self.load_model()

        print(
            "The training/loading of the model has finished!\nNow enter interactive mode :)"
        )
        print("-----")
        print("Example 1: trainer.infer_tails(1,10,topk=5)")
        self.infer_tails(1, 10, topk=5)

        print("-----")
        print("Example 2: trainer.infer_heads(10,20,topk=5)")
        self.infer_heads(10, 20, topk=5)

        print("-----")
        print("Example 3: trainer.infer_rels(1,20,topk=5)")
        self.infer_rels(1, 20, topk=5)

    def exit_interactive_mode(self):
        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        print("Thank you for trying out inference interactive script :)")

    def infer_tails(self, h, r, topk=5):
        tails_op = self.model.infer_tails(h, r, topk)
        tails = self.sess.run(tails_op)
        print("\n(head, relation)->({},{}) :: Inferred tails->({})\n".format(
            h, r, ",".join([str(i) for i in tails])))
        idx2ent = self.model.config.knowledge_graph.read_cache_data(
            'idx2entity')
        idx2rel = self.model.config.knowledge_graph.read_cache_data(
            'idx2relation')
        print("head: %s" % idx2ent[h])
        print("relation: %s" % idx2rel[r])

        for idx, tail in enumerate(tails):
            print("%dth predicted tail: %s" % (idx, idx2ent[tail]))

        return {tail: idx2ent[tail] for tail in tails}

    def infer_heads(self, r, t, topk=5):
        heads_op = self.model.infer_heads(r, t, topk)
        heads = self.sess.run(heads_op)

        print("\n(relation,tail)->({},{}) :: Inferred heads->({})\n".format(
            t, r, ",".join([str(i) for i in heads])))
        idx2ent = self.model.config.knowledge_graph.read_cache_data(
            'idx2entity')
        idx2rel = self.model.config.knowledge_graph.read_cache_data(
            'idx2relation')
        print("tail: %s" % idx2ent[t])
        print("relation: %s" % idx2rel[r])

        for idx, head in enumerate(heads):
            print("%dth predicted head: %s" % (idx, idx2ent[head]))

        return {head: idx2ent[head] for head in heads}

    def infer_rels(self, h, t, topk=5):
        rels_op = self.model.infer_rels(h, t, topk)
        rels = self.sess.run(rels_op)

        print("\n(head,tail)->({},{}) :: Inferred rels->({})\n".format(
            h, t, ",".join([str(i) for i in rels])))
        idx2ent = self.model.config.knowledge_graph.read_cache_data(
            'idx2entity')
        idx2rel = self.model.config.knowledge_graph.read_cache_data(
            'idx2relation')
        print("head: %s" % idx2ent[h])
        print("tail: %s" % idx2ent[t])

        for idx, rel in enumerate(rels):
            print("%dth predicted rel: %s" % (idx, idx2rel[rel]))

        return {rel: idx2rel[rel] for rel in rels}

    ''' Procedural functions:'''

    def save_model(self):
        """Function to save the model."""
        saved_path = self.config.path_tmp / self.model.model_name
        saved_path.mkdir(parents=True, exist_ok=True)

        saver = tf.train.Saver(self.model.parameter_list)
        saver.save(self.sess, str(saved_path / 'model.vec'))

    def load_model(self):
        """Function to load the model."""
        saved_path = self.config.path_tmp / self.model.model_name
        if saved_path.exists():
            saver = tf.train.Saver(self.model.parameter_list)
            saver.restore(self.sess, str(saved_path / 'model.vec'))

    def display(self):
        """Function to display embedding."""
        options = {
            "ent_only_plot": True,
            "rel_only_plot": not self.config.plot_entity_only,
            "ent_and_rel_plot": not self.config.plot_entity_only
        }

        if self.config.plot_embedding:
            viz = Visualization(model=self.model,
                                vis_opts=options,
                                sess=self.sess)

            viz.plot_embedding(resultpath=self.config.figures,
                               algos=self.model.model_name,
                               show_label=False)

        if self.config.plot_training_result:
            viz = Visualization(model=self.model, sess=self.sess)
            viz.plot_train_result()

        if self.config.plot_testing_result:
            viz = Visualization(model=self.model, sess=self.sess)
            viz.plot_test_result()

    def export_embeddings(self):
        """
            Export embeddings in tsv and pandas pickled format. 
            With tsvs (both label, vector files), you can:
            1) Use those pretained embeddings for your applications.  
            2) Visualize the embeddings in this website to gain insights. (https://projector.tensorflow.org/)

            Pandas dataframes can be read with pd.read_pickle('desired_file.pickle')
        """
        save_path = self.config.path_embeddings / self.model.model_name
        save_path.mkdir(parents=True, exist_ok=True)

        idx2ent = self.model.config.knowledge_graph.read_cache_data(
            'idx2entity')
        idx2rel = self.model.config.knowledge_graph.read_cache_data(
            'idx2relation')

        series_ent = pd.Series(idx2ent)
        series_rel = pd.Series(idx2rel)
        series_ent.to_pickle(save_path / "ent_labels.pickle")
        series_rel.to_pickle(save_path / "rel_labels.pickle")

        with open(str(save_path / "ent_labels.tsv"), 'w') as l_export_file:
            for label in idx2ent.values():
                l_export_file.write(label + "\n")

        with open(str(save_path / "rel_labels.tsv"), 'w') as l_export_file:
            for label in idx2rel.values():
                l_export_file.write(label + "\n")

        for parameter in self.model.parameter_list:
            all_ids = list(range(0, int(parameter.shape[0])))
            stored_name = parameter.name.split(':')[0]
            # import pdb; pdb.set_trace()

            if len(parameter.shape) == 2:
                op_get_all_embs = tf.nn.embedding_lookup(parameter, all_ids)
                all_embs = self.sess.run(op_get_all_embs)
                with open(str(save_path / ("%s.tsv" % stored_name)),
                          'w') as v_export_file:
                    for idx in all_ids:
                        v_export_file.write(
                            "\t".join([str(x) for x in all_embs[idx]]) + "\n")

                df = pd.DataFrame(all_embs)
                df.to_pickle(save_path / ("%s.pickle" % stored_name))

    def summary(self):
        """Function to print the summary."""
        print("\n------------------Global Setting--------------------")
        # Acquire the max length and add four more spaces
        maxspace = len(max([k for k in self.config.__dict__.keys()])) + 20
        for key, val in self.config.__dict__.items():
            if key in self.config.__dict__['hyperparameters']:
                continue

            if isinstance(val, (KGMetaData, KnowledgeGraph)) or key.startswith(
                    'gpu') or key.startswith('hyperparameters'):
                continue

            if len(key) < maxspace:
                for i in range(maxspace - len(key)):
                    key = ' ' + key
            print("%s : %s" % (key, val))
        print("---------------------------------------------------")

    def summary_hyperparameter(self):
        """Function to print the hyperparameter summary."""
        print("\n-----------%s Hyperparameter Setting-------------" %
              (self.model.model_name))
        maxspace = len(max([k
                            for k in self.config.hyperparameters.keys()])) + 15
        for key, val in self.config.hyperparameters.items():
            if len(key) < maxspace:
                for i in range(maxspace - len(key)):
                    key = ' ' + key
            print("%s : %s" % (key, val))
        print("---------------------------------------------------")
示例#7
0
class Trainer(TrainerMeta):
    def __init__(self, model, debug=False, tuning=False):
        self.debug = debug
        self.model = model
        self.config = self.model.config
        self.training_results = []
        self.gen_train = None
        self.tuning = tuning

    def build_model(self):
        """function to build the model"""
        self.model.def_inputs()
        self.model.def_parameters()
        if getattr(self.model, "def_layer", None):
            self.model.def_layer()
        self.model.def_loss()

        if not self.debug:
            self.sess = tf.Session(config=self.config.gpu_config)
        else:
            self.sess = tf.InteractiveSession()
        self.global_step = tf.Variable(0, name="global_step", trainable=False)

        if self.config.optimizer == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'rms':
            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config.learning_rate)
        elif self.config.optimizer == 'adagrad':
            optimizer = tf.train.AdagradOptimizer(
                learning_rate=self.config.learning_rate)
        else:
            raise NotImplementedError("No support for %s optimizer" %
                                      self.config.optimizer)

        grads = optimizer.compute_gradients(self.model.loss)
        self.op_train = optimizer.apply_gradients(grads,
                                                  global_step=self.global_step)
        self.sess.run(tf.global_variables_initializer())

        if not self.tuning:
            self.summary()
            self.summary_hyperparameter()

    ''' Training related functions:'''

    def train_model(self):
        """function to train the model"""
        loss = 0

        if self.config.loadFromData:
            self.load_model()
        else:
            generator_config = GeneratorConfig(
                data='train',
                algo=self.model.model_name,
                batch_size=self.model.config.batch_size)
            self.gen_train = Generator(config=generator_config,
                                       model_config=self.model.config)

            if not self.tuning:
                self.evaluator = Evaluation(model=self.model, debug=self.debug)

            for n_iter in range(self.config.epochs):
                loss = self.train_model_epoch(n_iter)
                if not self.tuning:
                    self.test(n_iter)

            self.gen_train.stop()

            if not self.tuning:
                self.evaluator.save_training_result(self.training_results)
                self.evaluator.stop()

            if self.config.save_model:
                self.save_model()

        if self.config.disp_result:
            self.display()

        if self.config.disp_summary:
            self.summary()
            self.summary_hyperparameter()

        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return loss

    def tune_model(self):
        """function to tune the model"""
        acc = 0

        generator_config = GeneratorConfig(
            data='train',
            algo=self.model.model_name,
            batch_size=self.model.config.batch_size)
        self.gen_train = Generator(config=generator_config,
                                   model_config=self.model.config)

        self.evaluator = Evaluation(model=self.model,
                                    debug=self.debug,
                                    tuning=True)

        for n_iter in range(self.config.epochs):
            self.train_model_epoch(n_iter)

        self.gen_train.stop()
        self.evaluator.test_batch(self.sess, n_iter)
        acc = self.evaluator.output_queue.get()
        self.evaluator.stop()
        self.sess.close()
        tf.reset_default_graph(
        )  # clean the tensorflow for the next training task.

        return acc

    def train_model_epoch(self, epoch_idx):
        acc_loss = 0

        num_batch = self.model.config.kg_meta.tot_train_triples // self.config.batch_size if not self.debug else 10

        start_time = timeit.default_timer()

        for batch_idx in range(num_batch):
            data = list(next(self.gen_train))
            if self.model.model_name.lower() in [
                    "tucker", "tucker_v2", "conve", "complex", "distmult",
                    "proje_pointwise"
            ]:
                h = data[0]
                r = data[1]
                t = data[2]
                hr_t = data[3]
                rt_h = data[4]

                feed_dict = {
                    self.model.h: h,
                    self.model.r: r,
                    self.model.t: t,
                    self.model.hr_t: hr_t,
                    self.model.rt_h: rt_h
                }
            else:
                ph = data[0]
                pr = data[1]
                pt = data[2]
                nh = data[3]
                nr = data[4]
                nt = data[5]

                feed_dict = {
                    self.model.pos_h: ph,
                    self.model.pos_t: pt,
                    self.model.pos_r: pr,
                    self.model.neg_h: nh,
                    self.model.neg_t: nt,
                    self.model.neg_r: nr
                }

            _, step, loss = self.sess.run(
                [self.op_train, self.global_step, self.model.loss], feed_dict)

            acc_loss += loss

            if not self.tuning:
                print('[%.2f sec](%d/%d): -- loss: %.5f' %
                      (timeit.default_timer() - start_time, batch_idx,
                       num_batch, loss),
                      end='\r')
        if not self.tuning:
            print('iter[%d] ---Train Loss: %.5f ---time: %.2f' %
                  (epoch_idx, acc_loss, timeit.default_timer() - start_time))

        self.training_results.append([epoch_idx, acc_loss])

        return acc_loss

    ''' Testing related functions:'''

    def test(self, curr_epoch):

        if not self.config.full_test_flag and (
                curr_epoch % self.config.test_step == 0 or curr_epoch == 0
                or curr_epoch == self.config.epochs - 1):
            self.evaluator.test_batch(self.sess, curr_epoch)
        else:
            if curr_epoch == self.config.epochs - 1:
                self.evaluator.test_batch(self.sess, curr_epoch)

    ''' Procedural functions:'''

    def save_model(self):
        """function to save the model"""
        saved_path = self.config.tmp / self.model.model_name
        saved_path.mkdir(parents=True, exist_ok=True)

        saver = tf.train.Saver(self.model.parameter_list)
        saver.save(self.sess, str(saved_path / 'model.vec'))

    def load_model(self):
        """function to load the model"""
        saved_path = self.config.tmp / self.model.model_name
        if saved_path.exists():
            saver = tf.train.Saver(self.model.parameter_list)
            saver.restore(self.sess, str(saved_path / 'model.vec'))

    def display(self):
        """function to display embedding"""
        options = {
            "ent_only_plot": True,
            "rel_only_plot": not self.config.plot_entity_only,
            "ent_and_rel_plot": not self.config.plot_entity_only
        }

        if self.config.plot_embedding:
            viz = Visualization(model=self.model, vis_opts=options)

            viz.plot_embedding(sess=self.sess,
                               resultpath=self.config.figures,
                               algos=self.model.model_name,
                               show_label=False)

        if self.config.plot_training_result:
            viz = Visualization(model=self.model)
            viz.plot_train_result()

        if self.config.plot_testing_result:
            viz = Visualization(model=self.model)
            viz.plot_test_result()

    def summary(self):
        """function to print the summary"""
        print("\n------------------Global Setting--------------------")
        # Acquire the max length and add four more spaces
        maxspace = len(max([k for k in self.config.__dict__.keys()])) + 20
        for key, val in self.config.__dict__.items():
            if key in self.config.__dict__['hyperparameters']:
                continue

            if isinstance(val, (KGMetaData, KnowledgeGraph)) or key.startswith(
                    'gpu') or key.startswith('hyperparameters'):
                continue

            if len(key) < maxspace:
                for i in range(maxspace - len(key)):
                    key = ' ' + key
            print("%s : %s" % (key, val))
        print("---------------------------------------------------")

    def summary_hyperparameter(self):
        """function to print the hyperparameter summary"""
        print("\n-----------%s Hyperparameter Setting-------------" %
              (self.model.model_name))
        maxspace = len(max([k
                            for k in self.config.hyperparameters.keys()])) + 15
        for key, val in self.config.hyperparameters.items():
            if len(key) < maxspace:
                for i in range(maxspace - len(key)):
                    key = ' ' + key
            print("%s : %s" % (key, val))
        print("---------------------------------------------------")