def _gradient_calculation(self, true_batchs, examples, total_stats, report_stats, step): self.model.zero_grad() for batch in true_batchs: loss = self.model(batch) # Topic Model loss topic_stats = Statistics(topic_loss=loss.clone().item() / float(examples)) loss.div(float(examples)).backward(retain_graph=False) total_stats.update(topic_stats) report_stats.update(topic_stats) if step % 1000 == 0: for k in range(self.args.topic_num): logger.info(','.join([ self.model.voc_id_wrapper.i2w(i) for i in self.model.topic_model.tm1.beta.topk(20, dim=-1) [1][k].tolist() ])) # in case of multi step gradient accumulation, # update only after accum batches if self.n_gpu > 1: grads = [ p.grad.data for p in self.model.parameters() if p.requires_grad and p.grad is not None ] distributed.all_reduce_and_rescale_tensors(grads, float(1)) for o in self.optims: o.step()
def poll(self): try: value = self.touchpin.read() except ValueError: logger.error('Failed reading touchpin') return weighted_value = sum(self.readings[-2:] + [value]) / 3 mean = self.get_current_mean() thresh = mean * self.threshold ratio = weighted_value / mean #logger.debug( # '[{}] Mean: {:04.0f}, Threshold: {:04.0f}, This: {:04.0f}, This weighted: {:04.0f} / {:.0%}' # .format(utime.ticks_ms(), mean, thresh, value, weighted_value, ratio) #) # logger.debug('{} {} {}'.format(mean, weighted_value, int(ratio*100))) if weighted_value < thresh: now = utime.ticks_ms() if (utime.ticks_diff(now, self.callback_triggered_last) < self.debounce_ms): logger.info('Debounced') # Make reading affect mean less - this allows for slow recalibration #value += (thresh - value)*0.9 else: self.callback() self.callback_triggered_last = now self.readings.pop(0) self.readings.append(weighted_value)
def record(self, entry): """Records an entry in the history.""" self.entries.append(entry) message = '[{}] {} {} ({})'.format( entry.process, entry.action, entry.operation, ', '.join([str(value) for value in entry.values])) if entry.action == 'invoke': logger.warn(message) elif entry.action == 'ok': logger.debug(message) elif entry.action == 'fail': logger.error(message) elif entry.action == 'function': logger.info(message)
def build_trainer(args, device_id, model, optims, tokenizer): """ Simplify `Trainer` creation based on user `opt`s* Args: opt (:obj:`Namespace`): user options (usually from argument parsing) model (:obj:`onmt.models.NMTModel`): the model to train fields (dict): dict of fields optim (:obj:`onmt.utils.Optimizer`): optimizer used during training data_type (str): string describing the type of data e.g. "text", "img", "audio" model_saver(:obj:`onmt.models.ModelSaverBase`): the utility object used to save the model """ device = "cpu" if args.visible_gpus == '-1' else "cuda" grad_accum_count = args.accum_count n_gpu = args.world_size if device_id >= 0: gpu_rank = int(args.gpu_ranks[device_id]) else: gpu_rank = 0 n_gpu = 0 print('gpu_rank %d' % gpu_rank) tensorboard_log_dir = args.model_path writer = SummaryWriter(tensorboard_log_dir, comment="Unmt") report_manager = ReportMgr(args.report_every, start_time=-1, tensorboard_writer=writer) trainer = Trainer(args, model, optims, tokenizer, grad_accum_count, n_gpu, gpu_rank, report_manager) # print(tr) if (model): n_params = _tally_parameters(model) logger.info('* number of parameters: %d' % n_params) return trainer
def poll(self): value = self.touchpin.read() weighted_value = sum(self.readings[-2:] + [value]) / 3 mean = self.get_current_mean() thresh = mean * self.threshold ratio = weighted_value / mean #logger.debug( # '[{}] Mean: {:04.0f}, Threshold: {:04.0f}, This: {:04.0f}, This weighted: {:04.0f} / {:.0%}' # .format(utime.ticks_ms(), mean, thresh, value, weighted_value, ratio) #) logger.debug('{} {} {}'.format(mean, weighted_value, int(ratio*100))) if weighted_value < thresh: now = utime.ticks_ms() if (utime.ticks_diff(now, self.callback_triggered_last) < self.debounce_ms): logger.info('Debounced') # Make reading affect mean less - this allows for slow recalibration #value += (thresh - value)*0.9 else: self.callback() self.callback_triggered_last = now self.readings.pop(0) self.readings.append(weighted_value)
def train(self, train_iter_fct, train_steps, valid_iter_fct=None, valid_steps=-1): """ The main training loops. by iterating over training data (i.e. `train_iter_fct`) and running validation (i.e. iterating over `valid_iter_fct` Args: train_iter_fct(function): a function that returns the train iterator. e.g. something like train_iter_fct = lambda: generator(*args, **kwargs) valid_iter_fct(function): same as train_iter_fct, for valid data train_steps(int): valid_steps(int): save_checkpoint_steps(int): Return: None """ logger.info('Start training...') step = self.optims[0]._step + 1 true_batchs = [] accum = 0 examples = 0 train_iter = train_iter_fct() total_stats = Statistics() report_stats = Statistics() self._start_report_manager(start_time=total_stats.start_time) while step <= train_steps: for i, batch in enumerate(train_iter): if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank): true_batchs.append(batch) examples += batch.tgt.size(0) accum += 1 if accum == self.grad_accum_count: if self.n_gpu > 1: examples = sum( distributed.all_gather_list(examples)) self._gradient_calculation(true_batchs, examples, total_stats, report_stats, step) report_stats = self._maybe_report_training( step, train_steps, self.optims[0].learning_rate, report_stats) true_batchs = [] accum = 0 examples = 0 if (step % self.save_checkpoint_steps == 0 and self.gpu_rank == 0): self._save(step) step += 1 if step > train_steps: break train_iter = train_iter_fct() return total_stats