def train(args): # setting for logging if not os.path.exists(args.log): os.mkdir(args.log) logger = logging.getLogger() logging.basicConfig(level=logging.INFO) log_path = os.path.join(args.log, 'log') file_handler = logging.FileHandler(log_path) fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') file_handler.setFormatter(fmt) logger.addHandler(file_handler) # TODO: develop the recording of arguments in logging logger.info('Arguments...') for arg, val in sorted(vars(args).items()): logger.info('{:>10} -----> {}'.format(arg, val)) ent_vocab = Vocab.load(args.ent) rel_vocab = Vocab.load(args.rel) n_entity, n_relation = len(ent_vocab), len(rel_vocab) # preparing data logger.info('preparing data...') train_dat = TripletDataset.load(args.train, ent_vocab, rel_vocab) valid_dat = TripletDataset.load(args.valid, ent_vocab, rel_vocab) if args.valid else None if args.filtered: logger.info('loading whole graph...') from utils.graph import TensorTypeGraph whole_graph = TensorTypeGraph.load_from_raw(args.graphall, ent_vocab, rel_vocab) else: whole_graph = None if args.opt == 'sgd': opt = SGD(args.lr) elif args.opt == 'adagrad': opt = Adagrad(args.lr) else: raise NotImplementedError if args.l2_reg > 0: opt.set_l2_reg(args.l2_reg) if args.gradclip > 0: opt.set_gradclip(args.gradclip) logger.info('loading model...') with open(args.load, 'rb') as f: model = dill.load(f) # evaluator = Evaluator(args.metric, args.nbest, args.filtered, whole_graph) if args.valid or args.synthetic else None evaluator = Evaluator(args.metric, args.nbest, args.filtered, whole_graph) if args.valid else None # delete args.synthetic to run if args.filtered and args.valid: evaluator.prepare_valid(valid_dat) if args.mode == 'pairwise': trainer = PairwiseTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log) elif args.mode == 'single': trainer = SingleTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log) else: raise NotImplementedError trainer.fit(train_dat) logger.info('done all')
def train(args): # setting for logging if not os.path.exists(args.log): os.mkdir(args.log) logger = logging.getLogger() logging.basicConfig(level=logging.INFO) log_path = os.path.join(args.log, 'log') file_handler = logging.FileHandler(log_path) fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') file_handler.setFormatter(fmt) logger.addHandler(file_handler) logger.info('Arguments...') for arg, val in sorted(vars(args).items()): logger.info('{:>10} -----> {}'.format(arg, val)) ent_vocab = Vocab.load(args.ent) rel_vocab = Vocab.load(args.rel) n_entity, n_relation = len(ent_vocab), len(rel_vocab) # knowledge base completion if args.task == 'kbc': train_dat = TripletDataset.load(args.train, ent_vocab, rel_vocab) valid_dat = TripletDataset.load(args.valid, ent_vocab, rel_vocab) if args.valid else None # triplet classification elif args.task == 'tc': assert args.metric == 'acc' train_dat = TripletDataset.load(args.train, ent_vocab, rel_vocab) valid_dat = LabeledTripletDataset.load( args.valid, ent_vocab, rel_vocab) if args.valid else None else: raise ValueError('Invalid task: {}'.format(args.task)) if args.opt == 'sgd': opt = SGD(args.lr) elif args.opt == 'adagrad': opt = Adagrad(args.lr) else: raise NotImplementedError if args.l2_reg > 0: opt.set_l2_reg(args.l2_reg) if args.gradclip > 0: opt.set_gradclip(args.gradclip) logger.info('building model...') if args.method == 'complex': from models.complex import ComplEx model = ComplEx(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) else: raise NotImplementedError if args.filtered: print('loading whole graph...') from utils.graph import TensorTypeGraph graphall = TensorTypeGraph.load_from_raw(args.graphall, ent_vocab, rel_vocab) else: graphall = None evaluator = Evaluator(args.metric, args.nbest, args.filtered, True, graphall) if args.valid or args.synthetic else None if args.filtered and args.valid: evaluator.prepare_valid(valid_dat) if args.mode == 'pairwise': trainer = PairwiseTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log, restart=args.restart) elif args.mode == 'single': trainer = SingleTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log, restart=args.restart) else: raise NotImplementedError trainer.fit(samples=train_dat)
def train(args): # setting for logging if not os.path.exists(args.log): os.mkdir(args.log) logger = logging.getLogger() logging.basicConfig(level=logging.INFO) log_path = os.path.join(args.log, 'log') file_handler = logging.FileHandler(log_path) fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') file_handler.setFormatter(fmt) logger.addHandler(file_handler) # TODO: develop the recording of arguments in logging logger.info('Arguments...') for arg, val in sorted(vars(args).items()): logger.info('{:>10} -----> {}'.format(arg, val)) ent_vocab = Vocab.load(args.ent) rel_vocab = Vocab.load(args.rel) n_entity, n_relation = len(ent_vocab), len(rel_vocab) # preparing data logger.info('preparing data...') train_dat = TripletDataset.load(args.train, ent_vocab, rel_vocab) valid_dat = TripletDataset.load(args.valid, ent_vocab, rel_vocab) if args.valid else None if args.filtered: logger.info('loading whole graph...') from utils.graph import TensorTypeGraph whole_graph = TensorTypeGraph.load_from_raw(args.graphall, ent_vocab, rel_vocab) else: whole_graph = None if args.opt == 'sgd': opt = SGD(args.lr) elif args.opt == 'adagrad': opt = Adagrad(args.lr) elif args.opt == 'dsgd': opt = DecaySGD(args.lr) else: raise NotImplementedError if args.l2_reg > 0: opt.set_l2_reg(args.l2_reg) if args.gradclip > 0: opt.set_gradclip(args.gradclip) logger.info('building model...') if args.method == 'complex': from models.complex import ComplEx model = ComplEx(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'distmult': from models.distmult import DistMult model = DistMult(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'transe': from models.transe import TransE model = TransE(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'hole': from models.hole import HolE model = HolE(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'rescal': from models.rescal import RESCAL model = RESCAL(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'analogy': from models.analogy import ANALOGY model = ANALOGY(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, cp_ratio=args.cp_ratio, mode=args.mode) elif args.method == 'transe_set': from models.transe_set import TransE_set model = TransE_set(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'line': from models.line_model import LineModel model = LineModel(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) else: raise NotImplementedError # evaluator = Evaluator(args.metric, args.nbest, args.filtered, whole_graph) if args.valid or args.synthetic else None evaluator = Evaluator(args.metric, args.nbest, args.filtered, whole_graph) if args.valid else None # delete args.synthetic to run if args.filtered and args.valid: evaluator.prepare_valid(valid_dat) if args.mode == 'pairwise': trainer = PairwiseTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log) elif args.mode == 'single': trainer = SingleTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log) else: raise NotImplementedError trainer.fit(train_dat) logger.info('done all')
def train(args): # setting for logging if not os.path.exists(args.log): os.mkdir(args.log) logger = logging.getLogger() logging.basicConfig(level=logging.INFO) log_path = os.path.join(args.log, 'log') file_handler = logging.FileHandler(log_path) fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s') file_handler.setFormatter(fmt) logger.addHandler(file_handler) # TODO: develop the recording of arguments in logging logger.info('Arguments...') for arg, val in vars(args).items(): logger.info('{:>10} -----> {}'.format(arg, val)) ent_vocab = Vocab.load(args.ent) rel_vocab = Vocab.load(args.rel) n_entity, n_relation = len(ent_vocab), len(rel_vocab) # preparing data logger.info('preparing data...') train_dat = TripletDataset.load(args.train, ent_vocab, rel_vocab) valid_dat = TripletDataset.load(args.valid, ent_vocab, rel_vocab) if args.valid else None if args.filtered: logger.info('loading whole graph...') from utils.graph import TensorTypeGraph whole_graph = TensorTypeGraph.load_from_raw(args.graphall, ent_vocab, rel_vocab) else: whole_graph = None if args.opt == 'sgd': opt = SGD(args.lr) elif args.opt == 'adagrad': opt = Adagrad(args.lr) else: raise NotImplementedError if args.l2_reg > 0: opt.set_l2_reg(args.l2_reg) if args.gradclip > 0: opt.set_gradclip(args.gradclip) logger.info('building model...') if args.method == 'complex': from models.complex import ComplEx model = ComplEx(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'distmult': from models.distmult import DistMult model = DistMult(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'transe': from models.transe import TransE model = TransE(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'hole': from models.hole import HolE model = HolE(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'rescal': from models.rescal import RESCAL model = RESCAL(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, mode=args.mode) elif args.method == 'analogy': from models.analogy import ANALOGY model = ANALOGY(n_entity=n_entity, n_relation=n_relation, margin=args.margin, dim=args.dim, cp_ratio=args.cp_ratio, mode=args.mode) elif args.method == 'randwalk': from models.randwalk import RandWalk logger.info( 'using random walk model to learning embedding unsupervisedly.') model = RandWalk(n_entity=n_entity, n_relation=n_relation, knowledge_path=args.train, ent_vocab=ent_vocab, rel_vocab=rel_vocab, dim=args.dim, output=args.log) model.train() model.save_model(os.path.join(args.log, model.__class__.__name__)) return elif args.method == "lr": from models.lr import LogisticReg model = LogisticReg(n_entity=n_entity, n_relation=n_relation, train_path=args.train, ent_vocab=ent_vocab, rel_vocab=rel_vocab, dim=args.dim, output=args.log, wv_model_path=args.wv_model, negative=args.negative, feat_type=args.feat_type) starttime = time() if args.mode == "triplet_cls": logger.info("Training a triple classifer") model.train_triple_classifer() else: model.train() endtime = time() logger.info("lr model train time {:.6f}".format(endtime - starttime)) model.save_model(os.path.join(args.log, model.__class__.__name__)) return else: raise NotImplementedError evaluator = Evaluator( args.metric, args.nbest, args.filtered, whole_graph) if args.valid or args.synthetic else None if args.filtered and args.valid: evaluator.prepare_valid(valid_dat) if args.mode == 'pairwise': trainer = PairwiseTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log) elif args.mode == 'single': trainer = SingleTrainer(model=model, opt=opt, save_step=args.save_step, batchsize=args.batch, logger=logger, evaluator=evaluator, valid_dat=valid_dat, n_negative=args.negative, epoch=args.epoch, model_dir=args.log) else: raise NotImplementedError trainer.fit(train_dat) logger.info('done all')