def __init__(self, sentences, extra={}, **kwargs): self.sentences = sentences self.config = config = get_config(get_default_config(), **kwargs) self.extra = extra self.loader = None self.cuda = config.get('cuda') self.logger = get_logger()
def run(options): logger = get_logger() experiment_logger = ExperimentLogger() train_dataset, validation_dataset = get_train_and_validation(options) train_iterator = get_train_iterator(options, train_dataset) validation_iterator = get_validation_iterator(options, validation_dataset) embeddings = train_dataset['embeddings'] word2idx = train_dataset['word2idx'] logger.info('Initializing model.') trainer = build_net(options, embeddings, train_iterator, word2idx=word2idx) logger.info('Model:') for name, p in trainer.net.named_parameters(): logger.info('{} {} {}'.format(name, p.shape, p.requires_grad)) # Evaluation. context = {} context['dataset'] = validation_dataset context['batch_iterator'] = validation_iterator model_evaluation = ModelEvaluation(get_eval_components(options, context, config_lst=options.eval_config)) if options.eval_only_mode: info = dict() info['experiment_path'] = options.experiment_path info['step'] = 0 run_evaluation(options, trainer, model_evaluation, info, metadata=dict(step=0)) sys.exit() if options.save_init: logger.info('Saving model (init).') trainer.save_model(os.path.join(options.experiment_path, 'model_init.pt')) run_train(options, train_iterator, trainer, model_evaluation)
def __init__(self, data_source, batch_size, include_partial=False, rng=None): self.data_source = data_source self.active = False if rng is None: rng = np.random.RandomState(seed=11) self.rng = rng self.batch_size = batch_size self.include_partial = include_partial self._batch_size_cache = { 0: self.batch_size } self.logger = get_logger()
def configure(options): # Configure output paths for this experiment. configure_experiment(options.experiment_path) # Get logger. logger = get_logger() # Print flags. logger.info(stringify_flags(options)) save_flags(options, options.experiment_path)
def run_evaluation(options, trainer, model_evaluation, info, metadata): logger = get_logger() lst = [] for eval_result_dict in model_evaluation.run(trainer, info, metadata): eval_result = eval_result_dict['result'] eval_name = eval_result['name'] for k, v in eval_result['meta'].items(): logger.info('eval[{}] {}={}'.format(eval_name, k, v)) lst.append(eval_result_dict) return lst
def __init__(self, **kwargs): super().__init__() # defaults self.enabled = True self.is_initialized = True self.write = False self.logger = get_logger() self.init_defaults() # override for k, v in kwargs.items(): setattr(self, k, v)
def build_net(options, context=dict(), net_components=dict()): logger = get_logger() logger.info('build net') # Components. projection_layer = net_components['projection_layer'] diora = net_components['diora'] loss_funcs = net_components['loss_funcs'] # Context. cuda = context['cuda'] lr = options.lr word2idx = context['word2idx'] # Net net = Net(projection_layer, diora, loss_funcs=loss_funcs, size=options.hidden_dim) # Load model. if options.load_model_path is not None: logger.info('Loading model: {}'.format(options.load_model_path)) Trainer.load_model(net, options.load_model_path) # CUDA-support if cuda: net.cuda() diora.cuda() # Trainer trainer_kwargs = {} trainer_kwargs['net'] = net trainer_kwargs['cuda'] = cuda trainer_kwargs['mlp_reg'] = options.mlp_reg trainer_kwargs['clip_threshold'] = options.clip_threshold trainer_kwargs['word2idx'] = word2idx trainer = Trainer(**trainer_kwargs) trainer.experiment_name = options.experiment_name trainer.init_optimizer(options) logger.info('Architecture =\n {}'.format(str(trainer.net))) return trainer
def get_iterator(self): options = self.options logger = get_logger() logger.info('Generating seeds for {} epochs using initial seed {}.'.format(options.max_epoch, options.seed)) seeds = generate_seeds(options.max_epoch, options.seed) step = 0 for epoch, seed in zip(range(options.max_epoch), seeds): logger.info('epoch={} seed={}'.format(epoch, seed)) it = self.batch_iterator.get_iterator(random_seed=seed, epoch=epoch) wrap_it = self.get_iterator_standard(it) def wrap_again(it): last_batch_idx, last_batch_map, end_of_epoch = None, None, False for batch_idx, batch_map in it: if last_batch_idx is not None: yield last_batch_idx, last_batch_map, end_of_epoch last_batch_idx, last_batch_map = batch_idx, batch_map end_of_epoch = True yield last_batch_idx, last_batch_map, end_of_epoch for epoch_step, (batch_idx, batch_map, end_of_epoch) in enumerate(wrap_again(wrap_it)): should = {} should['end_of_epoch'] = end_of_epoch should['log'] = step % options.log_every_batch == 0 should['periodic'] = step % options.save_latest == 0 and step >= options.save_after should['distinct'] = step % options.save_distinct == 0 and step >= options.save_after should_eval = ( options.eval_every_batch > 0 and step % options.eval_every_batch == 0 and step >= options.eval_after ) should_eval = should_eval or ( end_of_epoch and epoch % options.eval_every_epoch == 0 and step >= options.eval_after ) should['eval'] = should_eval yield epoch, step, batch_idx, batch_map, should step += 1 if options.max_step >= 0 and step >= options.max_step: logger.info('Max-Step={} Quitting.'.format(options.max_step)) sys.exit()
def build_net(options, embeddings, batch_iterator=None, word2idx=None): cuda = options.cuda context = {} context['cuda'] = cuda context['embeddings'] = embeddings context['batch_iterator'] = batch_iterator context['word2idx'] = word2idx net_components = get_net_components(options, context) trainer = trainer_build_net(options, context=context, net_components=net_components) logger = get_logger() logger.info('# of params = {}'.format(count_params(trainer.net))) return trainer
def __init__(self, **kwargs): super().__init__() # defaults self.enabled = True self.unregistered = {} self.logger = get_logger() self.init_defaults() # override for k, v in kwargs.items(): if k in self.unregistered: self.unregistered[k] = v continue setattr(self, k, v) self.init_parameters() self.reset_parameters()
def __init__(self, net=None, cuda=None, word2idx=None, mlp_reg=0, clip_threshold=5.0): super(Trainer, self).__init__() self.net = net self.optimizer = None self.optimizer_cls = None self.optimizer_kwargs = None self.cuda = cuda self.word2idx = word2idx self._embedding_keys = None self.mlp_reg = mlp_reg self.clip_threshold = clip_threshold self.logger = get_logger()
def __init__(self, name, kwargs_dict): self.name = name self.kwargs_dict = kwargs_dict self.logger = get_logger()
def run_train(options, train_iterator, trainer, model_evaluation): logger = get_logger() experiment_logger = ExperimentLogger() logger.info('Running train.') step = 0 best_step = 0 best_metric = math.inf best_dict = {} my_iterator = MyTrainIterator(options, train_iterator) for epoch, step, batch_idx, batch_map, should in my_iterator.get_iterator(): # HACK: Weird fix that counteracts other libraries (i.e. allennlp) modifying # the global logger. Needed after adding tensorboard. while len(logger.parent.handlers) > 0: logger.parent.handlers.pop() batch_map['step'] = step result = trainer.step(batch_map) experiment_logger.record(result) del result if should['log']: metrics = experiment_logger.log_batch(epoch, step, batch_idx) # -- Periodic Checkpoints -- # if should['periodic']: logger.info('Saving model (periodic).') trainer.save_model(os.path.join(options.experiment_path, 'model_periodic.pt')) save_experiment(os.path.join(options.experiment_path, 'experiment_periodic.json'), dict(step=step, epoch=epoch, best_step=best_step, best_metric=best_metric)) if should['distinct']: logger.info('Saving model (distinct).') trainer.save_model(os.path.join(options.experiment_path, 'model.step_{}.pt'.format(step))) save_experiment(os.path.join(options.experiment_path, 'experiment.step_{}.json'.format(step)), dict(step=step, epoch=epoch, best_step=best_step, best_metric=best_metric)) # -- Validation -- # if should['eval']: logger.info('Evaluation.') info = dict() info['experiment_path'] = options.experiment_path info['step'] = step info['epoch'] = epoch for eval_result_dict in run_evaluation(options, trainer, model_evaluation, info, metadata=dict(step=step)): result = eval_result_dict['result'] func = eval_result_dict['component'] name = func.name for key, val, is_best in func.compare(best_dict, result): best_dict_key = 'best__{}__{}'.format(name, key) # Early stopping. if is_best: if best_dict_key in best_dict: prev_val = best_dict[best_dict_key]['value'] else: prev_val = None # Update running result. best_dict[best_dict_key] = {} best_dict[best_dict_key]['eval'] = name best_dict[best_dict_key]['metric'] = key best_dict[best_dict_key]['value'] = val best_dict[best_dict_key]['step'] = step best_dict[best_dict_key]['epoch'] = epoch logger.info('Recording, best eval, key = {}, val = {} -> {}, json = {}'.format( best_dict_key, prev_val, val, json.dumps(best_dict[best_dict_key]))) if step >= options.save_after: logger.info('Saving model, best eval, key = {}, json = {}'.format( best_dict_key, json.dumps(best_dict[best_dict_key]))) logger.info('checkpoint_dir = {}'.format(options.experiment_path)) # Save result and model. trainer.save_model( os.path.join(options.experiment_path, 'model.{}.pt'.format(best_dict_key))) save_experiment(os.path.join(options.experiment_path, 'experiment.{}.json'.format(best_dict_key)), best_dict[best_dict_key]) # END OF EPOCH if should['end_of_epoch']: experiment_logger.log_epoch(epoch, step) trainer.end_of_epoch(best_dict)
def __init__(self, datasets): self.datasets = datasets self.logger = get_logger()
def __init__(self, reader): super(ReaderManager, self).__init__() self.reader = reader self.logger = get_logger()
def __init__(self, lowercase=True, filter_length=0, for_nopunct=False): self.lowercase = lowercase self.filter_length = filter_length self.for_nopunct = for_nopunct self.logger = get_logger()