def __init__(self, config, train_data): """ :param Config.Config config: :param Dataset train_data: """ self.config = config self.opts = CollectionReadCheckCovered(config.get_of_type("hyper_param_tuning", dict, {})) self.log = log.v1 train_data.init_seq_order(epoch=1) self.train_data = StaticDataset.copy_from_dataset( train_data, max_seqs=self.opts.get("num_train_steps", 100)) self.hyper_params = [] # type: list[HyperParam] self._find_hyper_params() if not self.hyper_params: raise Exception("No hyper params found.") self.hyper_params.sort(key=lambda p: p.unique_idx) print("We have found these hyper params:") for p in self.hyper_params: print(" %s" % p.description()) self.dry_run_first_individual = self.opts.get("dry_run_first_individual", True) self.num_iterations = self.opts["num_tune_iterations"] self.num_individuals = self.opts["num_individuals"] self.num_kill_individuals = self.opts.get( "num_kill_individuals", self.num_individuals // 2) self.num_best = self.opts.get("num_best", 10) self.num_threads = self.opts.get("num_threads", guess_requested_max_num_threads()) self.opts.assert_all_read()
def run(self): if self.individual.cost is not None: return self.individual.cost start_time = time.time() hyper_param_mapping = self.individual.hyper_param_mapping print("Training %r using hyper params:" % self.individual.name, file=log.v2) for p in self.optim.hyper_params: print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2) config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids) engine = Engine(config=config) train_data = StaticDataset.copy_from_dataset(self.optim.train_data) engine.init_train_from_config(config=config, train_data=train_data) # Not directly calling train() as we want to have full control. engine.epoch = 1 train_data.init_seq_order(epoch=engine.epoch) batches = train_data.generate_batches( recurrent_net=engine.network.recurrent, batch_size=engine.batch_size, max_seqs=engine.max_seqs, max_seq_length=int(engine.max_seq_length), seq_drop=engine.seq_drop, shuffle_batches=engine.shuffle_batches, used_data_keys=engine.network.used_data_keys) engine.updater.set_learning_rate(engine.learning_rate) trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True) self.runner = trainer if self.cancel_flag: raise CancelTrainingException("Trainer cancel flag is set") trainer.run(report_prefix="hyper param tune train %r" % self.individual.name) if not trainer.finalized: print("Trainer exception:", trainer.run_exception, file=log.v1) raise trainer.run_exception cost = trainer.score["cost:output"] print( "Individual %s:" % self.individual.name, "Train cost:", cost, "elapsed time:", hms_fraction(time.time() - start_time), file=self.optim.log) self.individual.cost = cost
def run(self): if self.individual.cost is not None: return self.individual.cost start_time = time.time() hyper_param_mapping = self.individual.hyper_param_mapping print("Training %r using hyper params:" % self.individual.name, file=log.v2) for p in self.optim.hyper_params: print(" %s -> %s" % (p.description(), hyper_param_mapping[p]), file=log.v2) config = self.optim.create_config_instance(hyper_param_mapping, gpu_ids=self.gpu_ids) engine = Engine(config=config) train_data = StaticDataset.copy_from_dataset(self.optim.train_data) engine.init_train_from_config(config=config, train_data=train_data) # Not directly calling train() as we want to have full control. engine.epoch = 1 train_data.init_seq_order(epoch=engine.epoch) batches = train_data.generate_batches( recurrent_net=engine.network.recurrent, batch_size=engine.batch_size, max_seqs=engine.max_seqs, max_seq_length=int(engine.max_seq_length), seq_drop=engine.seq_drop, shuffle_batches=engine.shuffle_batches, used_data_keys=engine.network.used_data_keys) engine.updater.set_learning_rate(engine.learning_rate, session=engine.tf_session) trainer = Runner(engine=engine, dataset=train_data, batches=batches, train=True) self.runner = trainer if self.cancel_flag: raise CancelTrainingException("Trainer cancel flag is set") trainer.run(report_prefix="hyper param tune train %r" % self.individual.name) if not trainer.finalized: print("Trainer exception:", trainer.run_exception, file=log.v1) raise trainer.run_exception cost = trainer.score["cost:output"] print( "Individual %s:" % self.individual.name, "Train cost:", cost, "elapsed time:", hms_fraction(time.time() - start_time), file=self.optim.log) self.individual.cost = cost